diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 6e971ae73f5d..beee16d188eb 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -276,6 +276,11 @@ def generate( considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.generate() is only supported for generation models " + "(XForCausalLM).") + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts), @@ -420,6 +425,11 @@ def encode( considered legacy and may be deprecated in the future. You should instead pass them via the ``inputs`` parameter. """ + if not self.llm_engine.model_config.embedding_mode: + raise ValueError( + "LLM.encode() is only supported for embedding models (XModel)." + ) + if prompt_token_ids is not None or multi_modal_data is not None: inputs = self._convert_v1_inputs( prompts=cast(Optional[Union[str, List[str]]], prompts),