diff --git a/tests/async_engine/test_chat_template.py b/tests/async_engine/test_chat_template.py
index 528d6ff182dd..aea8a7fed6e3 100644
--- a/tests/async_engine/test_chat_template.py
+++ b/tests/async_engine/test_chat_template.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from vllm.entrypoints.openai.chat_utils import load_chat_template
+from vllm.entrypoints.chat_utils import load_chat_template
 from vllm.entrypoints.openai.protocol import ChatCompletionRequest
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
diff --git a/vllm/entrypoints/openai/chat_utils.py b/vllm/entrypoints/chat_utils.py
similarity index 76%
rename from vllm/entrypoints/openai/chat_utils.py
rename to vllm/entrypoints/chat_utils.py
index b3d5ca77ac16..7b5cbbb251b1 100644
--- a/vllm/entrypoints/openai/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1,15 +1,23 @@
 import codecs
 from dataclasses import dataclass, field
 from functools import lru_cache
-from typing import Awaitable, Iterable, List, Optional, TypedDict, cast, final
-
-from openai.types.chat import (ChatCompletionContentPartImageParam,
-                               ChatCompletionContentPartTextParam)
+from typing import Awaitable, Iterable, List, Optional, Union, cast, final
+
+# yapf conflicts with isort for this block
+# yapf: disable
+from openai.types.chat import ChatCompletionContentPartImageParam
+from openai.types.chat import (
+    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam)
+from openai.types.chat import ChatCompletionContentPartTextParam
+from openai.types.chat import (
+    ChatCompletionMessageParam as OpenAIChatCompletionMessageParam)
+# yapf: enable
+# pydantic needs the TypedDict from typing_extensions
+from pydantic import ConfigDict
 from transformers import PreTrainedTokenizer
+from typing_extensions import Required, TypedDict
 
 from vllm.config import ModelConfig
-from vllm.entrypoints.openai.protocol import (ChatCompletionContentPartParam,
-                                              ChatCompletionMessageParam)
 from vllm.logger import init_logger
 from vllm.multimodal import MultiModalDataDict
 from vllm.multimodal.utils import async_get_and_parse_image
@@ -17,6 +25,37 @@
 logger = init_logger(__name__)
 
 
+class CustomChatCompletionContentPartParam(TypedDict, total=False):
+    __pydantic_config__ = ConfigDict(extra="allow")  # type: ignore
+
+    type: Required[str]
+    """The type of the content part."""
+
+
+ChatCompletionContentPartParam = Union[OpenAIChatCompletionContentPartParam,
+                                       CustomChatCompletionContentPartParam]
+
+
+class CustomChatCompletionMessageParam(TypedDict, total=False):
+    """Enables custom roles in the Chat Completion API."""
+    role: Required[str]
+    """The role of the message's author."""
+
+    content: Union[str, List[ChatCompletionContentPartParam]]
+    """The contents of the message."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the
+    same role.
+    """
+
+
+ChatCompletionMessageParam = Union[OpenAIChatCompletionMessageParam,
+                                   CustomChatCompletionMessageParam]
+
+
 @final  # So that it should be compatible with Dict[str, str]
 class ConversationMessage(TypedDict):
     role: str
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 2faf06119230..212483109a79 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -3,50 +3,16 @@
 import time
 from typing import Any, Dict, List, Literal, Optional, Union
 
-import openai.types.chat
 import torch
 from pydantic import BaseModel, ConfigDict, Field, model_validator
-# pydantic needs the TypedDict from typing_extensions
-from typing_extensions import Annotated, Required, TypedDict
+from typing_extensions import Annotated
 
+from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
 
 
-class CustomChatCompletionContentPartParam(TypedDict, total=False):
-    __pydantic_config__ = ConfigDict(extra="allow")  # type: ignore
-
-    type: Required[str]
-    """The type of the content part."""
-
-
-ChatCompletionContentPartParam = Union[
-    openai.types.chat.ChatCompletionContentPartParam,
-    CustomChatCompletionContentPartParam]
-
-
-class CustomChatCompletionMessageParam(TypedDict, total=False):
-    """Enables custom roles in the Chat Completion API."""
-    role: Required[str]
-    """The role of the message's author."""
-
-    content: Union[str, List[ChatCompletionContentPartParam]]
-    """The contents of the message."""
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the
-    same role.
-    """
-
-
-ChatCompletionMessageParam = Union[
-    openai.types.chat.ChatCompletionMessageParam,
-    CustomChatCompletionMessageParam]
-
-
 class OpenAIBaseModel(BaseModel):
     # OpenAI API does not allow extra fields
     model_config = ConfigDict(extra="forbid")
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 0d7eede377ce..95ca5d080afc 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -9,9 +9,9 @@
 
 from vllm.config import ModelConfig
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.entrypoints.openai.chat_utils import (ConversationMessage,
-                                                load_chat_template,
-                                                parse_chat_message_content)
+from vllm.entrypoints.chat_utils import (ConversationMessage,
+                                         load_chat_template,
+                                         parse_chat_message_content)
 from vllm.entrypoints.openai.protocol import (
     ChatCompletionLogProb, ChatCompletionLogProbs,
     ChatCompletionLogProbsContent, ChatCompletionNamedToolChoiceParam,
diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py
index 94367bd3a604..70a254785eba 100644
--- a/vllm/entrypoints/openai/serving_tokenization.py
+++ b/vllm/entrypoints/openai/serving_tokenization.py
@@ -2,9 +2,9 @@
 
 from vllm.config import ModelConfig
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.entrypoints.openai.chat_utils import (ConversationMessage,
-                                                load_chat_template,
-                                                parse_chat_message_content)
+from vllm.entrypoints.chat_utils import (ConversationMessage,
+                                         load_chat_template,
+                                         parse_chat_message_content)
 from vllm.entrypoints.openai.protocol import (DetokenizeRequest,
                                               DetokenizeResponse,
                                               TokenizeRequest,