[BugFix] Prevent LLM.encode for non-generation Models (vllm-project…

…#5184) Co-authored-by: mgoin <michael@neuralmagic.com>
xjpang · Jun 27, 2024 · 01cc533 · 01cc533
1 parent 8615094
commit 01cc533
Showing 1 changed file with 10 additions and 0 deletions.
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -276,6 +276,11 @@ def generate(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.generate() is only supported for generation models "
+                "(XForCausalLM).")
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),
@@ -420,6 +425,11 @@ def encode(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if not self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.encode() is only supported for embedding models (XModel)."
+            )
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),