From d36ba548d36959b546937981037de6edcbc18e9b Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 14:03:11 +0000 Subject: [PATCH 01/10] Add truncate_input_tokens to openai completions api --- vllm/entrypoints/openai/protocol.py | 2 ++ vllm/entrypoints/openai/serving_completion.py | 4 ++-- vllm/entrypoints/openai/serving_engine.py | 20 +++++++++++++++---- vllm/sampling_params.py | 8 +++++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 26499b8d7a66..bbb732c9ba70 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -183,6 +183,7 @@ class CompletionRequest(BaseModel): guided_json: Optional[Union[str, dict, BaseModel]] = None guided_regex: Optional[str] = None guided_choice: Optional[List[str]] = None + truncate_input_tokens: Optional[int] = None def to_sampling_params(self): echo_without_generation = self.echo and self.max_tokens == 0 @@ -225,6 +226,7 @@ def logit_bias_logits_processor( include_stop_str_in_output=self.include_stop_str_in_output, length_penalty=self.length_penalty, logits_processors=logits_processors, + truncate_input_tokens=self.truncate_input_tokens, ) @model_validator(mode="before") diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 713e67793b29..c3037d109667 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -300,10 +300,10 @@ async def create_completion(self, request: CompletionRequest, for i, prompt in enumerate(prompts): if prompt_is_tokens: input_ids = self._validate_prompt_and_tokenize( - request, prompt_ids=prompt) + request, prompt_ids=prompt, truncate_input_tokens=sampling_params.truncate_input_tokens) else: input_ids = self._validate_prompt_and_tokenize( - request, prompt=prompt) + request, prompt=prompt, truncate_input_tokens=sampling_params.truncate_input_tokens) generators.append( self.engine.generate(None, diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 09945471e9af..64b244094adb 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -62,7 +62,8 @@ async def _post_init(self): self.tokenizer = get_tokenizer( engine_model_config.tokenizer, tokenizer_mode=engine_model_config.tokenizer_mode, - trust_remote_code=engine_model_config.trust_remote_code) + trust_remote_code=engine_model_config.trust_remote_code, + truncation_side="left") async def show_available_models(self) -> ModelList: """Show available models. Right now we only have one model.""" @@ -147,15 +148,26 @@ def _validate_prompt_and_tokenize( self, request: Union[ChatCompletionRequest, CompletionRequest], prompt: Optional[str] = None, - prompt_ids: Optional[List[int]] = None) -> List[int]: + prompt_ids: Optional[List[int]] = None, + truncate_input_tokens: Optional[int] = None) -> List[int]: if not (prompt or prompt_ids): raise ValueError("Either prompt or prompt_ids should be provided.") if (prompt and prompt_ids): raise ValueError( "Only one of prompt or prompt_ids should be provided.") - input_ids = prompt_ids if prompt_ids is not None else self.tokenizer( - prompt).input_ids + if prompt_ids is None: + tokenizer_kwargs = {} + if truncate_input_tokens is not None: + tokenizer_kwargs["truncation"] = True + tokenizer_kwargs["max_length"] = truncate_input_tokens + input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids + else: + if truncate_input_tokens is not None: + input_ids = prompt_ids[-truncate_input_tokens:] + else: + input_ids = prompt_ids + token_num = len(input_ids) if request.max_tokens is None: diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 8103f3c2b24b..912fb51cd966 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -91,6 +91,7 @@ class SamplingParams: tokens in the output. Defaults to True. logits_processors: List of functions that modify logits based on previously generated tokens. + truncate_input_tokens: Truncate input tokens using left-truncation. """ def __init__( @@ -118,6 +119,7 @@ def __init__( skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, logits_processors: Optional[List[LogitsProcessor]] = None, + truncate_input_tokens: Optional[int] = None, ) -> None: self.n = n self.best_of = best_of if best_of is not None else n @@ -150,6 +152,7 @@ def __init__( self.spaces_between_special_tokens = spaces_between_special_tokens self.logits_processors = logits_processors self.include_stop_str_in_output = include_stop_str_in_output + self.truncate_input_tokens = truncate_input_tokens self._verify_args() if self.use_beam_search: self._verify_beam_search() @@ -197,6 +200,8 @@ def _verify_args(self) -> None: if self.prompt_logprobs is not None and self.prompt_logprobs < 0: raise ValueError(f"prompt_logprobs must be non-negative, got " f"{self.prompt_logprobs}.") + if self.truncate_input_tokens is not None and self.truncate_input_tokens < 1: + raise ValueError(f"truncate_input_tokens must be >= 1, got {self.truncate_input_tokens}") def _verify_beam_search(self) -> None: if self.best_of == 1: @@ -276,4 +281,5 @@ def __repr__(self) -> str: f"prompt_logprobs={self.prompt_logprobs}, " f"skip_special_tokens={self.skip_special_tokens}, " "spaces_between_special_tokens=" - f"{self.spaces_between_special_tokens})") + f"{self.spaces_between_special_tokens}, " + f"truncate_input_tokens={self.truncate_input_tokens})") From 586b4a0f196b15a1b47cfd946c1ed1e37ebbe8bc Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 14:28:31 +0000 Subject: [PATCH 02/10] Better docstring --- vllm/sampling_params.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 912fb51cd966..8df8f7a7409a 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -91,7 +91,8 @@ class SamplingParams: tokens in the output. Defaults to True. logits_processors: List of functions that modify logits based on previously generated tokens. - truncate_input_tokens: Truncate input tokens using left-truncation. + truncate_input_tokens: If set to an integer k, will use only the last k + tokens from the prompt. Defaults to None (no truncation). """ def __init__( From 559e4410c0f974c641660e89cdb7c6fc6068f5b8 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 15:03:52 +0000 Subject: [PATCH 03/10] Apply formatting --- vllm/entrypoints/openai/serving_completion.py | 10 ++++++++-- vllm/sampling_params.py | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index c3037d109667..aceb11a3782b 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -300,10 +300,16 @@ async def create_completion(self, request: CompletionRequest, for i, prompt in enumerate(prompts): if prompt_is_tokens: input_ids = self._validate_prompt_and_tokenize( - request, prompt_ids=prompt, truncate_input_tokens=sampling_params.truncate_input_tokens) + request, + prompt_ids=prompt, + truncate_input_tokens=sampling_params. + truncate_input_tokens) else: input_ids = self._validate_prompt_and_tokenize( - request, prompt=prompt, truncate_input_tokens=sampling_params.truncate_input_tokens) + request, + prompt=prompt, + truncate_input_tokens=sampling_params. + truncate_input_tokens) generators.append( self.engine.generate(None, diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 8df8f7a7409a..88110e50bd86 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -202,7 +202,9 @@ def _verify_args(self) -> None: raise ValueError(f"prompt_logprobs must be non-negative, got " f"{self.prompt_logprobs}.") if self.truncate_input_tokens is not None and self.truncate_input_tokens < 1: - raise ValueError(f"truncate_input_tokens must be >= 1, got {self.truncate_input_tokens}") + raise ValueError( + f"truncate_input_tokens must be >= 1, got {self.truncate_input_tokens}" + ) def _verify_beam_search(self) -> None: if self.best_of == 1: From 32bc1f8ed59c961e0ff6afdd6f26d2ba75456af9 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 19:52:13 +0100 Subject: [PATCH 04/10] Update vllm/entrypoints/openai/serving_engine.py tokenizer_kwargs: more efficient allocation Co-authored-by: Nick Hill --- vllm/entrypoints/openai/serving_engine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 64b244094adb..eee58ba2a7c7 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -157,10 +157,9 @@ def _validate_prompt_and_tokenize( "Only one of prompt or prompt_ids should be provided.") if prompt_ids is None: - tokenizer_kwargs = {} - if truncate_input_tokens is not None: - tokenizer_kwargs["truncation"] = True - tokenizer_kwargs["max_length"] = truncate_input_tokens + tokenizer_kwargs = {} if truncate_input_tokens is not None else { + "truncation": True, "max_length": truncate_input_tokens, + } input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids else: if truncate_input_tokens is not None: From 4b92849513d6910f189c2a0295ead7b928e67861 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 18:57:11 +0000 Subject: [PATCH 05/10] Use truncate_prompt_tokens naming --- vllm/entrypoints/openai/protocol.py | 4 ++-- vllm/entrypoints/openai/serving_completion.py | 8 ++++---- vllm/entrypoints/openai/serving_engine.py | 14 +++++++------- vllm/sampling_params.py | 12 ++++++------ 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index bbb732c9ba70..51f9659c4c52 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -183,7 +183,7 @@ class CompletionRequest(BaseModel): guided_json: Optional[Union[str, dict, BaseModel]] = None guided_regex: Optional[str] = None guided_choice: Optional[List[str]] = None - truncate_input_tokens: Optional[int] = None + truncate_prompt_tokens: Optional[int] = None def to_sampling_params(self): echo_without_generation = self.echo and self.max_tokens == 0 @@ -226,7 +226,7 @@ def logit_bias_logits_processor( include_stop_str_in_output=self.include_stop_str_in_output, length_penalty=self.length_penalty, logits_processors=logits_processors, - truncate_input_tokens=self.truncate_input_tokens, + truncate_prompt_tokens=self.truncate_prompt_tokens, ) @model_validator(mode="before") diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index aceb11a3782b..e82e43d27fc5 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -302,14 +302,14 @@ async def create_completion(self, request: CompletionRequest, input_ids = self._validate_prompt_and_tokenize( request, prompt_ids=prompt, - truncate_input_tokens=sampling_params. - truncate_input_tokens) + truncate_prompt_tokens=sampling_params. + truncate_prompt_tokens) else: input_ids = self._validate_prompt_and_tokenize( request, prompt=prompt, - truncate_input_tokens=sampling_params. - truncate_input_tokens) + truncate_prompt_tokens=sampling_params. + truncate_prompt_tokens) generators.append( self.engine.generate(None, diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index eee58ba2a7c7..0647d4fe0dd1 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -149,7 +149,7 @@ def _validate_prompt_and_tokenize( request: Union[ChatCompletionRequest, CompletionRequest], prompt: Optional[str] = None, prompt_ids: Optional[List[int]] = None, - truncate_input_tokens: Optional[int] = None) -> List[int]: + truncate_prompt_tokens: Optional[int] = None) -> List[int]: if not (prompt or prompt_ids): raise ValueError("Either prompt or prompt_ids should be provided.") if (prompt and prompt_ids): @@ -157,15 +157,15 @@ def _validate_prompt_and_tokenize( "Only one of prompt or prompt_ids should be provided.") if prompt_ids is None: - tokenizer_kwargs = {} if truncate_input_tokens is not None else { - "truncation": True, "max_length": truncate_input_tokens, + tokenizer_kwargs = {} if truncate_prompt_tokens is not None else { + "truncation": True, + "max_length": truncate_prompt_tokens, } input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids + elif truncate_prompt_tokens is not None: + input_ids = prompt_ids[-truncate_prompt_tokens:] else: - if truncate_input_tokens is not None: - input_ids = prompt_ids[-truncate_input_tokens:] - else: - input_ids = prompt_ids + input_ids = prompt_ids token_num = len(input_ids) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 88110e50bd86..7870d417768a 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -91,7 +91,7 @@ class SamplingParams: tokens in the output. Defaults to True. logits_processors: List of functions that modify logits based on previously generated tokens. - truncate_input_tokens: If set to an integer k, will use only the last k + truncate_prompt_tokens: If set to an integer k, will use only the last k tokens from the prompt. Defaults to None (no truncation). """ @@ -120,7 +120,7 @@ def __init__( skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, logits_processors: Optional[List[LogitsProcessor]] = None, - truncate_input_tokens: Optional[int] = None, + truncate_prompt_tokens: Optional[int] = None, ) -> None: self.n = n self.best_of = best_of if best_of is not None else n @@ -153,7 +153,7 @@ def __init__( self.spaces_between_special_tokens = spaces_between_special_tokens self.logits_processors = logits_processors self.include_stop_str_in_output = include_stop_str_in_output - self.truncate_input_tokens = truncate_input_tokens + self.truncate_prompt_tokens = truncate_prompt_tokens self._verify_args() if self.use_beam_search: self._verify_beam_search() @@ -201,9 +201,9 @@ def _verify_args(self) -> None: if self.prompt_logprobs is not None and self.prompt_logprobs < 0: raise ValueError(f"prompt_logprobs must be non-negative, got " f"{self.prompt_logprobs}.") - if self.truncate_input_tokens is not None and self.truncate_input_tokens < 1: + if self.truncate_prompt_tokens is not None and self.truncate_prompt_tokens < 1: raise ValueError( - f"truncate_input_tokens must be >= 1, got {self.truncate_input_tokens}" + f"truncate_prompt_tokens must be >= 1, got {self.truncate_prompt_tokens}" ) def _verify_beam_search(self) -> None: @@ -285,4 +285,4 @@ def __repr__(self) -> str: f"skip_special_tokens={self.skip_special_tokens}, " "spaces_between_special_tokens=" f"{self.spaces_between_special_tokens}, " - f"truncate_input_tokens={self.truncate_input_tokens})") + f"truncate_prompt_tokens={self.truncate_prompt_tokens})") From b6554a81cfa56fa37ef79c6eb0cbf6f4c6d76379 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 1 Mar 2024 19:03:08 +0000 Subject: [PATCH 06/10] serving_engine.py: fix bug --- vllm/entrypoints/openai/serving_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 0647d4fe0dd1..7aca2a15fc2d 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -157,7 +157,7 @@ def _validate_prompt_and_tokenize( "Only one of prompt or prompt_ids should be provided.") if prompt_ids is None: - tokenizer_kwargs = {} if truncate_prompt_tokens is not None else { + tokenizer_kwargs = {} if truncate_prompt_tokens is None else { "truncation": True, "max_length": truncate_prompt_tokens, } From 6efc8d152b7d1bc5c79b9464b8f6038d88926c8c Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Tue, 5 Mar 2024 09:00:20 +0000 Subject: [PATCH 07/10] sampling_params: Docstring update --- vllm/sampling_params.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 7870d417768a..5a500e94725e 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -92,7 +92,8 @@ class SamplingParams: logits_processors: List of functions that modify logits based on previously generated tokens. truncate_prompt_tokens: If set to an integer k, will use only the last k - tokens from the prompt. Defaults to None (no truncation). + tokens from the prompt (i.e., left truncation). Defaults to None (i.e., + no truncation). """ def __init__( From 520f8bb9b0739097cd0e72ca65dd426a55defc68 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Tue, 26 Mar 2024 11:26:17 +0000 Subject: [PATCH 08/10] Use pydantic.conint --- vllm/entrypoints/openai/protocol.py | 4 ++-- vllm/entrypoints/openai/serving_engine.py | 4 +++- vllm/sampling_params.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index fbe721133ee9..ddd105c2012c 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -4,7 +4,7 @@ from typing import Dict, List, Literal, Optional, Union import torch -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, model_validator, conint from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid @@ -229,7 +229,7 @@ class CompletionRequest(BaseModel): min_tokens: Optional[int] = 0 skip_special_tokens: Optional[bool] = True spaces_between_special_tokens: Optional[bool] = True - truncate_prompt_tokens: Optional[int] = None + truncate_prompt_tokens: Optional[conint(ge=1)] = None # doc: end-completion-sampling-params # doc: begin-completion-extra-params diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 858439d47c2b..16b621419351 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -4,6 +4,8 @@ from http import HTTPStatus from typing import Dict, List, Optional, Union +from pydantic import conint + from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, CompletionRequest, ErrorResponse, @@ -174,7 +176,7 @@ def _validate_prompt_and_tokenize( request: Union[ChatCompletionRequest, CompletionRequest], prompt: Optional[str] = None, prompt_ids: Optional[List[int]] = None, - truncate_prompt_tokens: Optional[int] = None) -> List[int]: + truncate_prompt_tokens: Optional[conint(ge=1)] = None) -> List[int]: if not (prompt or prompt_ids): raise ValueError("Either prompt or prompt_ids should be provided.") if (prompt and prompt_ids): diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 6aa8a942fcea..510489c5e42f 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -5,6 +5,7 @@ from typing import Callable, List, Optional, Union import torch +from pydantic import conint _SAMPLING_EPS = 1e-5 @@ -124,7 +125,7 @@ def __init__( skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, logits_processors: Optional[List[LogitsProcessor]] = None, - truncate_prompt_tokens: Optional[int] = None, + truncate_prompt_tokens: Optional[conint(ge=1)] = None, ) -> None: self.n = n self.best_of = best_of if best_of is not None else n From 08b3e191667a56c91967cd42d1b4c660ca193ba5 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Tue, 26 Mar 2024 11:39:05 +0000 Subject: [PATCH 09/10] fix formatting --- vllm/entrypoints/openai/serving_engine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 16b621419351..ed7facd85c6d 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -176,7 +176,8 @@ def _validate_prompt_and_tokenize( request: Union[ChatCompletionRequest, CompletionRequest], prompt: Optional[str] = None, prompt_ids: Optional[List[int]] = None, - truncate_prompt_tokens: Optional[conint(ge=1)] = None) -> List[int]: + truncate_prompt_tokens: Optional[conint(ge=1)] = None + ) -> List[int]: if not (prompt or prompt_ids): raise ValueError("Either prompt or prompt_ids should be provided.") if (prompt and prompt_ids): From c8f3429b2649bff42036e221bb8ca9e97abe40b4 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Tue, 26 Mar 2024 11:43:36 +0000 Subject: [PATCH 10/10] Fix formatting --- vllm/entrypoints/openai/protocol.py | 2 +- vllm/sampling_params.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index ddd105c2012c..e9ab10490f96 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -4,7 +4,7 @@ from typing import Dict, List, Literal, Optional, Union import torch -from pydantic import BaseModel, Field, model_validator, conint +from pydantic import BaseModel, Field, conint, model_validator from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 510489c5e42f..51f34ce03203 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -95,8 +95,8 @@ class SamplingParams: logits_processors: List of functions that modify logits based on previously generated tokens. truncate_prompt_tokens: If set to an integer k, will use only the last k - tokens from the prompt (i.e., left truncation). Defaults to None (i.e., - no truncation). + tokens from the prompt (i.e., left truncation). Defaults to None + (i.e., no truncation). """ def __init__( @@ -216,10 +216,10 @@ def _verify_args(self) -> None: if self.prompt_logprobs is not None and self.prompt_logprobs < 0: raise ValueError(f"prompt_logprobs must be non-negative, got " f"{self.prompt_logprobs}.") - if self.truncate_prompt_tokens is not None and self.truncate_prompt_tokens < 1: - raise ValueError( - f"truncate_prompt_tokens must be >= 1, got {self.truncate_prompt_tokens}" - ) + if (self.truncate_prompt_tokens is not None + and self.truncate_prompt_tokens < 1): + raise ValueError(f"truncate_prompt_tokens must be >= 1, " + f"got {self.truncate_prompt_tokens}") def _verify_beam_search(self) -> None: if self.best_of == 1: