[BugFix] Prevent LLM.encode for non-generation Models (vllm-project…

…#5184) Co-authored-by: mgoin <michael@neuralmagic.com>
neuralmagic · robertgshaw2-redhat · Jun 11, 2024 · Jun 1, 2024 · Jun 1, 2024 · Jun 1, 2024
commit 5b6b8ed2123049c568b743fb1ed7a441cba1e759
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -285,6 +285,11 @@ def generate(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.generate() is only supported for generation models "
+                "(XForCausalLM).")
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),
@@ -429,6 +434,11 @@ def encode(
             considered legacy and may be deprecated in the future. You should
             instead pass them via the ``inputs`` parameter.
         """
+        if not self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.encode() is only supported for embedding models (XModel)."
+            )
+
         if prompt_token_ids is not None or multi_modal_data is not None:
             inputs = self._convert_v1_inputs(
                 prompts=cast(Optional[Union[str, List[str]]], prompts),