53 lines
2.0 KiB
YAML
53 lines
2.0 KiB
YAML
# 本地 / 内网部署模型档案
|
|
# 走 OpenAI 兼容协议(litellm provider 前缀 `openai/`,后段为实际 model 字段透传给 base_url)。
|
|
# 涉密任务时用户显式选 local.r1 / local.qwq 代替默认 deepseek_v4.flash;不走自动路由。
|
|
# 两个 variant 共用同一台推理服务器(api_base 同),api_key_env 也共用 LOCAL_LLM_API_KEY。
|
|
# thinking_mode=false:R1 / QwQ 是天生推理模型,默认就思考,不通过 reasoning_effort 等级控制
|
|
# (那是 OpenAI / DeepSeek V4 风格);设 true 会发 reasoning_effort 字段,本地 vLLM / sglang
|
|
# 多半不认,报 400。
|
|
family: local
|
|
|
|
variants:
|
|
r1:
|
|
display_name: DeepSeek-R1 (内网)
|
|
model_id: openai/DeepSeek-R1
|
|
api_base: http://182.54.21.126:9000/v1
|
|
api_key_env: LOCAL_LLM_API_KEY
|
|
max_context: 131072
|
|
reliable_context: 65536
|
|
max_output: 8192
|
|
parallel_tools: false
|
|
tool_calling_quality: fair
|
|
thinking_mode: false
|
|
reasoning_effort_levels: []
|
|
default_reasoning_effort: ""
|
|
code_quality: good
|
|
enable_run_python: true
|
|
max_iterations: 50
|
|
optimal_temperature: 0.6
|
|
prompt_caching: false
|
|
extended_thinking: false
|
|
|
|
qwen3:
|
|
# 服务端是 router 代理,model 字段必须匹配它注册的 alias;此服务实际注册名是
|
|
# `Qwen/QwQ-32B`(部署者别名),后端跑的是 Qwen3-30B-A3B(响应 `model` 字段 echo 真名)。
|
|
# 改 alias 时只动 model_id;display_name / variant key 跟实际后端走(前端看到真名)。
|
|
display_name: Qwen3-30B-A3B (内网)
|
|
model_id: openai/Qwen/QwQ-32B
|
|
api_base: http://182.54.21.126:9000/v1
|
|
api_key_env: LOCAL_LLM_API_KEY
|
|
max_context: 32768
|
|
reliable_context: 16384
|
|
max_output: 8192
|
|
parallel_tools: false
|
|
tool_calling_quality: fair
|
|
thinking_mode: false
|
|
reasoning_effort_levels: []
|
|
default_reasoning_effort: ""
|
|
code_quality: good
|
|
enable_run_python: true
|
|
max_iterations: 50
|
|
optimal_temperature: 0.6
|
|
prompt_caching: false
|
|
extended_thinking: false
|