diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 6e971ae73f5d..beee16d188eb 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -276,6 +276,11 @@ def generate(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.generate() is only supported for generation models "
+                "(XForCausalLM).")
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),
@@ -420,6 +425,11 @@ def encode(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if not self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.encode() is only supported for embedding models (XModel)."
+            )
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),