From 6f795b4f372c81ef5d0c7aaf5522ebc01f9bc8f2 Mon Sep 17 00:00:00 2001 From: senseb Date: Tue, 7 Jan 2025 11:13:36 +0800 Subject: [PATCH] Fixes #12414 --- .../siliconflow/llm/_position.yaml | 2 + .../llm/qwen2.5-72b-instruct-128k.yaml | 51 +++++++++++++++++++ .../llm/qwen2.5-72b-instruct-vendorA.yaml | 51 +++++++++++++++++++ .../siliconflow/llm/qwen2.5-72b-instruct.yaml | 2 +- 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml index a4d1aac10cb46a..8361be91ba2216 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -7,6 +7,8 @@ - Qwen/Qwen2.5-Coder-7B-Instruct - Qwen/Qwen2-VL-72B-Instruct - Qwen/Qwen2-1.5B-Instruct +- Qwen/Qwen2.5-72B-Instruct-128K +- Vendor-A/Qwen/Qwen2.5-72B-Instruct - Pro/Qwen/Qwen2-VL-7B-Instruct - OpenGVLab/InternVL2-26B - Pro/OpenGVLab/InternVL2-8B diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml new file mode 100644 index 00000000000000..79f94da3765670 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml @@ -0,0 +1,51 @@ +model: Qwen/Qwen2.5-72B-Instruct-128K +label: + en_US: Qwen/Qwen2.5-72B-Instruct-128K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml new file mode 100644 index 00000000000000..fdbe38ff21043c --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml @@ -0,0 +1,51 @@ +model: Vendor-A/Qwen/Qwen2.5-72B-Instruct +label: + en_US: Vendor-A/Qwen/Qwen2.5-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.00' + output: '1.00' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml index c80cd45dd302cd..de9d9d97bfa627 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml @@ -15,7 +15,7 @@ parameter_rules: type: int default: 512 min: 1 - max: 8192 + max: 4096 help: zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.