diff --git a/backend/onyx/llm/chat_llm.py b/backend/onyx/llm/chat_llm.py index 3d1cd9d9659..e7b94af0bcb 100644 --- a/backend/onyx/llm/chat_llm.py +++ b/backend/onyx/llm/chat_llm.py @@ -27,6 +27,7 @@ from onyx.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS from onyx.configs.app_configs import MOCK_LLM_RESPONSE +from onyx.configs.chat_configs import QA_TIMEOUT from onyx.configs.model_configs import ( DISABLE_LITELLM_STREAMING, ) @@ -35,6 +36,7 @@ from onyx.llm.interfaces import LLM from onyx.llm.interfaces import LLMConfig from onyx.llm.interfaces import ToolChoiceOptions +from onyx.llm.utils import model_is_reasoning_model from onyx.server.utils import mask_string from onyx.utils.logger import setup_logger from onyx.utils.long_term_log import LongTermLogger @@ -229,15 +231,15 @@ class DefaultMultiLLM(LLM): def __init__( self, api_key: str | None, - timeout: int, model_provider: str, model_name: str, + timeout: int | None = None, api_base: str | None = None, api_version: str | None = None, deployment_name: str | None = None, max_output_tokens: int | None = None, custom_llm_provider: str | None = None, - temperature: float = GEN_AI_TEMPERATURE, + temperature: float | None = None, custom_config: dict[str, str] | None = None, extra_headers: dict[str, str] | None = None, extra_body: dict | None = LITELLM_EXTRA_BODY, @@ -245,9 +247,16 @@ def __init__( long_term_logger: LongTermLogger | None = None, ): self._timeout = timeout + if timeout is None: + if model_is_reasoning_model(model_name): + self._timeout = QA_TIMEOUT * 10 # Reasoning models are slow + else: + self._timeout = QA_TIMEOUT + + self._temperature = GEN_AI_TEMPERATURE if temperature is None else temperature + self._model_provider = model_provider self._model_version = model_name - self._temperature = temperature self._api_key = api_key self._deployment_name = deployment_name self._api_base = api_base diff --git a/backend/onyx/llm/factory.py b/backend/onyx/llm/factory.py index 8e720a756b8..dff83e07f33 100644 --- a/backend/onyx/llm/factory.py +++ b/backend/onyx/llm/factory.py @@ -2,7 +2,6 @@ from onyx.chat.models import PersonaOverrideConfig from onyx.configs.app_configs import DISABLE_GENERATIVE_AI -from onyx.configs.chat_configs import QA_TIMEOUT from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from onyx.configs.model_configs import GEN_AI_TEMPERATURE from onyx.db.engine import get_session_context_manager @@ -88,8 +87,8 @@ def _create_llm(model: str) -> LLM: def get_default_llms( - timeout: int = QA_TIMEOUT, - temperature: float = GEN_AI_TEMPERATURE, + timeout: int | None = None, + temperature: float | None = None, additional_headers: dict[str, str] | None = None, long_term_logger: LongTermLogger | None = None, ) -> tuple[LLM, LLM]: @@ -138,7 +137,7 @@ def get_llm( api_version: str | None = None, custom_config: dict[str, str] | None = None, temperature: float | None = None, - timeout: int = QA_TIMEOUT, + timeout: int | None = None, additional_headers: dict[str, str] | None = None, long_term_logger: LongTermLogger | None = None, ) -> LLM: diff --git a/backend/onyx/llm/llm_provider_options.py b/backend/onyx/llm/llm_provider_options.py index 409d409a837..3767552238a 100644 --- a/backend/onyx/llm/llm_provider_options.py +++ b/backend/onyx/llm/llm_provider_options.py @@ -29,11 +29,11 @@ class WellKnownLLMProviderDescriptor(BaseModel): OPEN_AI_MODEL_NAMES = [ "o3-mini", "o1-mini", - "o1-preview", - "o1-2024-12-17", + "o1", "gpt-4", "gpt-4o", "gpt-4o-mini", + "o1-preview", "gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4-1106-preview", diff --git a/backend/onyx/llm/utils.py b/backend/onyx/llm/utils.py index 48c6e12ef22..04fc2260c74 100644 --- a/backend/onyx/llm/utils.py +++ b/backend/onyx/llm/utils.py @@ -543,3 +543,14 @@ def model_supports_image_input(model_name: str, model_provider: str) -> bool: f"Failed to get model object for {model_provider}/{model_name}" ) return False + + +def model_is_reasoning_model(model_name: str) -> bool: + _REASONING_MODEL_NAMES = [ + "o1", + "o1-mini", + "o3-mini", + "deepseek-reasoner", + "deepseek-r1", + ] + return model_name.lower() in _REASONING_MODEL_NAMES diff --git a/web/src/lib/hooks.ts b/web/src/lib/hooks.ts index 8005b707fcc..ff09b77035a 100644 --- a/web/src/lib/hooks.ts +++ b/web/src/lib/hooks.ts @@ -647,11 +647,11 @@ export const useUserGroups = (): { const MODEL_DISPLAY_NAMES: { [key: string]: string } = { // OpenAI models - "o1-2025-12-17": "O1 (December 2025)", - "o3-mini": "O3 Mini", - "o1-mini": "O1 Mini", - "o1-preview": "O1 Preview", - o1: "O1", + "o1-2025-12-17": "o1 (December 2025)", + "o3-mini": "o3 Mini", + "o1-mini": "o1 Mini", + "o1-preview": "o1 Preview", + o1: "o1", "gpt-4": "GPT 4", "gpt-4o": "GPT 4o", "gpt-4o-2024-08-06": "GPT 4o (Structured Outputs)", @@ -753,14 +753,7 @@ export function getDisplayNameForModel(modelName: string): string { } export const defaultModelsByProvider: { [name: string]: string[] } = { - openai: [ - "gpt-4", - "gpt-4o", - "gpt-4o-mini", - "o3-mini", - "o1-mini", - "o1-preview", - ], + openai: ["gpt-4", "gpt-4o", "gpt-4o-mini", "o3-mini", "o1-mini", "o1"], bedrock: [ "meta.llama3-1-70b-instruct-v1:0", "meta.llama3-1-8b-instruct-v1:0",