diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index b707d87c3af83..873decff37c1e 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1554,14 +1554,14 @@ def _get_num_new_tokens(self, seq_group: SequenceGroup, # the number of new tokens that is dividable by the block size # to avoid partial block matching. block_size = self.cache_config.block_size - reminder = budget.token_budget % block_size - if reminder != 0: + remainder = budget.token_budget % block_size + if remainder != 0: raise ValueError("When enabling chunked prefill and " "prefix caching, max_num_batched_tokens " "(chunk size) must be dividable by " "block size, but got chunk_size " f"({budget.token_budget}) % block_size " - f"({block_size}) = {reminder}") + f"({block_size}) = {remainder}") if remaining_token_budget < num_new_tokens: num_new_tokens = (remaining_token_budget // block_size) * block_size