From 043dd1f3f4de18726b210986a80199869098b58f Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Wed, 13 Nov 2024 03:02:56 -0800 Subject: [PATCH] [V1] Add missing tokenizer options for `Detokenizer` (#10288) Signed-off-by: Roger Wang --- vllm/v1/engine/detokenizer.py | 11 +++++++++-- vllm/v1/engine/llm_engine.py | 7 ++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index 1dbf8e75ec478..6249d60199a62 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -192,10 +192,17 @@ def _get_next_output_text(self, finished: bool, delta: bool) -> str: class Detokenizer: - def __init__(self, tokenizer_name: str): + def __init__(self, + tokenizer_name: str, + tokenizer_mode: str = "auto", + trust_remote_code: bool = False, + revision: Optional[str] = None): # TODO: once we support LoRA, we should should pass the tokenizer # here. We currently have two copies (this + in the LLMEngine). - self.tokenizer = get_tokenizer(tokenizer_name) + self.tokenizer = get_tokenizer(tokenizer_name=tokenizer_name, + tokenizer_mode=tokenizer_mode, + trust_remote_code=trust_remote_code, + revision=revision) # Request id -> IncrementalDetokenizer self.request_states: Dict[str, IncrementalDetokenizer] = {} diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index f37db92e8ea6b..5b45615a1b85b 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -53,7 +53,12 @@ def __init__( input_registry) # Detokenizer (converts EngineCoreOutputs --> RequestOutput) - self.detokenizer = Detokenizer(vllm_config.model_config.tokenizer) + self.detokenizer = Detokenizer( + tokenizer_name=vllm_config.model_config.tokenizer, + tokenizer_mode=vllm_config.model_config.tokenizer_mode, + trust_remote_code=vllm_config.model_config.trust_remote_code, + revision=vllm_config.model_config.tokenizer_revision, + ) # EngineCore (gets EngineCoreRequests and gives EngineCoreOutputs) self.engine_core = EngineCoreClient.make_client(