Fixes #12414

langgenius · Jan 7, 2025 · 6f795b4 · 6f795b4
1 parent 2bb521b
commit 6f795b4
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 1 deletion.
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -7,6 +7,8 @@
 - Qwen/Qwen2.5-Coder-7B-Instruct
 - Qwen/Qwen2-VL-72B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
+- Qwen/Qwen2.5-72B-Instruct-128K
+- Vendor-A/Qwen/Qwen2.5-72B-Instruct
 - Pro/Qwen/Qwen2-VL-7B-Instruct
 - OpenGVLab/InternVL2-26B
 - Pro/OpenGVLab/InternVL2-8B

diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml
@@ -0,0 +1,51 @@
+model: Qwen/Qwen2.5-72B-Instruct-128K
+label:
+  en_US: Qwen/Qwen2.5-72B-Instruct-128K
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml
@@ -0,0 +1,51 @@
+model: Vendor-A/Qwen/Qwen2.5-72B-Instruct
+label:
+  en_US: Vendor-A/Qwen/Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '1.00'
+  output: '1.00'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -15,7 +15,7 @@ parameter_rules:
     type: int
     default: 512
     min: 1
-    max: 8192
+    max: 4096
     help:
       zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
       en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.