From 75c92478197e404cad9320d89ee9a1cacd984776 Mon Sep 17 00:00:00 2001 From: Zhijie He Date: Sun, 1 Sep 2024 22:41:35 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=84=20style:=20update=20Groq=20model?= =?UTF-8?q?=20list=20&=20add=20`GROQ=5FMODEL=5FLIST`=20support=20(#3716)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 💄 style: update Groq model list & add `GROQ_MODEL_LIST` support * 💄 style: update Groq model list --- Dockerfile | 2 +- Dockerfile.database | 2 +- src/config/llm.ts | 2 ++ src/config/modelProviders/groq.ts | 55 ++++++++++++++++--------------- src/server/globalConfig/index.ts | 13 +++++++- 5 files changed, 44 insertions(+), 30 deletions(-) diff --git a/Dockerfile b/Dockerfile index 761fe0aa93946..43fc76e605b7d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -119,7 +119,7 @@ ENV \ # Google GOOGLE_API_KEY="" GOOGLE_PROXY_URL="" \ # Groq - GROQ_API_KEY="" GROQ_PROXY_URL="" \ + GROQ_API_KEY="" GROQ_MODEL_LIST="" GROQ_PROXY_URL="" \ # Minimax MINIMAX_API_KEY="" \ # Mistral diff --git a/Dockerfile.database b/Dockerfile.database index 7299503321af5..37645f9514fcd 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -151,7 +151,7 @@ ENV \ # Google GOOGLE_API_KEY="" GOOGLE_PROXY_URL="" \ # Groq - GROQ_API_KEY="" GROQ_PROXY_URL="" \ + GROQ_API_KEY="" GROQ_MODEL_LIST="" GROQ_PROXY_URL="" \ # Minimax MINIMAX_API_KEY="" \ # Mistral diff --git a/src/config/llm.ts b/src/config/llm.ts index 8f3777ff2204f..a477eeaf92261 100644 --- a/src/config/llm.ts +++ b/src/config/llm.ts @@ -49,6 +49,7 @@ export const getLLMConfig = () => { ENABLED_GROQ: z.boolean(), GROQ_API_KEY: z.string().optional(), + GROQ_MODEL_LIST: z.string().optional(), GROQ_PROXY_URL: z.string().optional(), ENABLED_OPENROUTER: z.boolean(), @@ -153,6 +154,7 @@ export const getLLMConfig = () => { ENABLED_GROQ: !!process.env.GROQ_API_KEY, GROQ_API_KEY: process.env.GROQ_API_KEY, + GROQ_MODEL_LIST: process.env.GROQ_MODEL_LIST, GROQ_PROXY_URL: process.env.GROQ_PROXY_URL, ENABLED_ZEROONE: !!process.env.ZEROONE_API_KEY, diff --git a/src/config/modelProviders/groq.ts b/src/config/modelProviders/groq.ts index 9c71f3382fc19..1dcdf5d7749c2 100644 --- a/src/config/modelProviders/groq.ts +++ b/src/config/modelProviders/groq.ts @@ -1,59 +1,59 @@ import { ModelProviderCard } from '@/types/llm'; // ref https://console.groq.com/docs/models +// ref https://console.groq.com/docs/tool-use const Groq: ModelProviderCard = { chatModels: [ + // TODO: During preview launch, Groq is limiting 3.1 models to max_tokens of 8k. { - displayName: 'LLaMA3.1 405B (Preview)', + displayName: 'Llama 3.1 8B (Preview)', + enabled: true, functionCall: true, - id: 'llama-3.1-405b-reasoning', - tokens: 16_000, + id: 'llama-3.1-8b-instant', + tokens: 8000, }, { - displayName: 'LLaMA 3.1 70B (Preview)', + displayName: 'Llama 3.1 70B (Preview)', enabled: true, functionCall: true, id: 'llama-3.1-70b-versatile', tokens: 8000, }, +/* + // Offline due to overwhelming demand! Stay tuned for updates. { - displayName: 'LLaMA 3.1 8B (Preview)', - enabled: true, + displayName: 'Llama 3.1 405B (Preview)', functionCall: true, - id: 'llama-3.1-8b-instant', + id: 'llama-3.1-405b-reasoning', tokens: 8000, }, +*/ { - displayName: 'LLaMA 3 Groq 70b Tool Use (preview)', + displayName: 'Llama 3 Groq 8B Tool Use (Preview)', enabled: true, functionCall: true, - id: 'llama3-groq-70b-8192-tool-use-preview', + id: 'llama3-groq-8b-8192-tool-use-preview', tokens: 8192, }, { - displayName: 'LLaMA 3 Groq 8b Tool Use (preview)', + displayName: 'Llama 3 Groq 70B Tool Use (Preview)', enabled: true, functionCall: true, - id: 'llama3-groq-8b-8192-tool-use-preview', + id: 'llama3-groq-70b-8192-tool-use-preview', + tokens: 8192, }, { - displayName: 'LLaMA3 70B', + displayName: 'Meta Llama 3 8B', enabled: true, functionCall: true, - id: 'llama3-70b-8192', + id: 'llama3-8b-8192', tokens: 8192, }, { - displayName: 'Mixtral-8x7b', + displayName: 'Meta Llama 3 70B', enabled: true, functionCall: true, - id: 'mixtral-8x7b-32768', - tokens: 32_768, - }, - { - displayName: 'Gemma 7B', - functionCall: true, - id: 'gemma-7b-it', + id: 'llama3-70b-8192', tokens: 8192, }, { @@ -64,16 +64,17 @@ const Groq: ModelProviderCard = { tokens: 8192, }, { - displayName: 'LLaMA3 8B', - enabled: true, + displayName: 'Gemma 7B', functionCall: true, - id: 'llama3-8b-8192', + id: 'gemma-7b-it', tokens: 8192, }, { - displayName: 'LLaMA2-70b-chat', - id: 'llama2-70b-4096', - tokens: 4096, + displayName: 'Mixtral 8x7B', + enabled: true, + functionCall: true, + id: 'mixtral-8x7b-32768', + tokens: 32_768, }, ], checkModel: 'gemma2-9b-it', diff --git a/src/server/globalConfig/index.ts b/src/server/globalConfig/index.ts index 5f31c584ee4cc..6f4a9dd3c8282 100644 --- a/src/server/globalConfig/index.ts +++ b/src/server/globalConfig/index.ts @@ -4,6 +4,7 @@ import { fileEnv } from '@/config/file'; import { langfuseEnv } from '@/config/langfuse'; import { getLLMConfig } from '@/config/llm'; import { + GroqProviderCard, NovitaProviderCard, OllamaProviderCard, OpenAIProviderCard, @@ -34,7 +35,10 @@ export const getServerGlobalConfig = () => { ENABLED_AWS_BEDROCK, ENABLED_GOOGLE, + ENABLED_GROQ, + GROQ_MODEL_LIST, + ENABLED_DEEPSEEK, ENABLED_PERPLEXITY, ENABLED_ANTHROPIC, @@ -99,7 +103,14 @@ export const getServerGlobalConfig = () => { bedrock: { enabled: ENABLED_AWS_BEDROCK }, deepseek: { enabled: ENABLED_DEEPSEEK }, google: { enabled: ENABLED_GOOGLE }, - groq: { enabled: ENABLED_GROQ }, + groq: { + enabled: ENABLED_GROQ, + enabledModels: extractEnabledModels(GROQ_MODEL_LIST), + serverModelCards: transformToChatModelCards({ + defaultChatModels: GroqProviderCard.chatModels, + modelString: GROQ_MODEL_LIST, + }), + }, minimax: { enabled: ENABLED_MINIMAX }, mistral: { enabled: ENABLED_MISTRAL }, moonshot: { enabled: ENABLED_MOONSHOT },