diff --git a/tests/async_engine/test_chat_template.py b/tests/async_engine/test_chat_template.py index 528d6ff182dd..aea8a7fed6e3 100644 --- a/tests/async_engine/test_chat_template.py +++ b/tests/async_engine/test_chat_template.py @@ -3,7 +3,7 @@ import pytest -from vllm.entrypoints.openai.chat_utils import load_chat_template +from vllm.entrypoints.chat_utils import load_chat_template from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.transformers_utils.tokenizer import get_tokenizer diff --git a/vllm/entrypoints/openai/chat_utils.py b/vllm/entrypoints/chat_utils.py similarity index 76% rename from vllm/entrypoints/openai/chat_utils.py rename to vllm/entrypoints/chat_utils.py index b3d5ca77ac16..7b5cbbb251b1 100644 --- a/vllm/entrypoints/openai/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1,15 +1,23 @@ import codecs from dataclasses import dataclass, field from functools import lru_cache -from typing import Awaitable, Iterable, List, Optional, TypedDict, cast, final - -from openai.types.chat import (ChatCompletionContentPartImageParam, - ChatCompletionContentPartTextParam) +from typing import Awaitable, Iterable, List, Optional, Union, cast, final + +# yapf conflicts with isort for this block +# yapf: disable +from openai.types.chat import ChatCompletionContentPartImageParam +from openai.types.chat import ( + ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam) +from openai.types.chat import ChatCompletionContentPartTextParam +from openai.types.chat import ( + ChatCompletionMessageParam as OpenAIChatCompletionMessageParam) +# yapf: enable +# pydantic needs the TypedDict from typing_extensions +from pydantic import ConfigDict from transformers import PreTrainedTokenizer +from typing_extensions import Required, TypedDict from vllm.config import ModelConfig -from vllm.entrypoints.openai.protocol import (ChatCompletionContentPartParam, - ChatCompletionMessageParam) from vllm.logger import init_logger from vllm.multimodal import MultiModalDataDict from vllm.multimodal.utils import async_get_and_parse_image @@ -17,6 +25,37 @@ logger = init_logger(__name__) +class CustomChatCompletionContentPartParam(TypedDict, total=False): + __pydantic_config__ = ConfigDict(extra="allow") # type: ignore + + type: Required[str] + """The type of the content part.""" + + +ChatCompletionContentPartParam = Union[OpenAIChatCompletionContentPartParam, + CustomChatCompletionContentPartParam] + + +class CustomChatCompletionMessageParam(TypedDict, total=False): + """Enables custom roles in the Chat Completion API.""" + role: Required[str] + """The role of the message's author.""" + + content: Union[str, List[ChatCompletionContentPartParam]] + """The contents of the message.""" + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the + same role. + """ + + +ChatCompletionMessageParam = Union[OpenAIChatCompletionMessageParam, + CustomChatCompletionMessageParam] + + @final # So that it should be compatible with Dict[str, str] class ConversationMessage(TypedDict): role: str diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 2faf06119230..212483109a79 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -3,50 +3,16 @@ import time from typing import Any, Dict, List, Literal, Optional, Union -import openai.types.chat import torch from pydantic import BaseModel, ConfigDict, Field, model_validator -# pydantic needs the TypedDict from typing_extensions -from typing_extensions import Annotated, Required, TypedDict +from typing_extensions import Annotated +from vllm.entrypoints.chat_utils import ChatCompletionMessageParam from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid -class CustomChatCompletionContentPartParam(TypedDict, total=False): - __pydantic_config__ = ConfigDict(extra="allow") # type: ignore - - type: Required[str] - """The type of the content part.""" - - -ChatCompletionContentPartParam = Union[ - openai.types.chat.ChatCompletionContentPartParam, - CustomChatCompletionContentPartParam] - - -class CustomChatCompletionMessageParam(TypedDict, total=False): - """Enables custom roles in the Chat Completion API.""" - role: Required[str] - """The role of the message's author.""" - - content: Union[str, List[ChatCompletionContentPartParam]] - """The contents of the message.""" - - name: str - """An optional name for the participant. - - Provides the model information to differentiate between participants of the - same role. - """ - - -ChatCompletionMessageParam = Union[ - openai.types.chat.ChatCompletionMessageParam, - CustomChatCompletionMessageParam] - - class OpenAIBaseModel(BaseModel): # OpenAI API does not allow extra fields model_config = ConfigDict(extra="forbid") diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 0d7eede377ce..95ca5d080afc 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -9,9 +9,9 @@ from vllm.config import ModelConfig from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.entrypoints.openai.chat_utils import (ConversationMessage, - load_chat_template, - parse_chat_message_content) +from vllm.entrypoints.chat_utils import (ConversationMessage, + load_chat_template, + parse_chat_message_content) from vllm.entrypoints.openai.protocol import ( ChatCompletionLogProb, ChatCompletionLogProbs, ChatCompletionLogProbsContent, ChatCompletionNamedToolChoiceParam, diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index 94367bd3a604..70a254785eba 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -2,9 +2,9 @@ from vllm.config import ModelConfig from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.entrypoints.openai.chat_utils import (ConversationMessage, - load_chat_template, - parse_chat_message_content) +from vllm.entrypoints.chat_utils import (ConversationMessage, + load_chat_template, + parse_chat_message_content) from vllm.entrypoints.openai.protocol import (DetokenizeRequest, DetokenizeResponse, TokenizeRequest,