From e2e0e5b3479782fdcb0115d90f776b4efc86afb8 Mon Sep 17 00:00:00 2001 From: "lei.yul" Date: Fri, 6 Sep 2024 17:52:13 +0800 Subject: [PATCH] [BugFix] Fix metrics error for --num-scheduler-steps > 1 --- vllm/engine/async_llm_engine.py | 5 ++--- vllm/engine/llm_engine.py | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 7fe8053fffb7b..bdf7e475af11b 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -355,9 +355,8 @@ async def step_async( output = [] # Finish the current step for all the sequence groups. - if self.scheduler_config.is_multi_step: - for seq_group in seq_group_metadata_list: - seq_group.finish_step() + for seq_group in seq_group_metadata_list: + seq_group.finish_step() if not self._has_remaining_steps(seq_group_metadata_list): # Clear the cache if we have finished all the steps diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 50dcb6937eb6f..1934c6905790b 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1810,6 +1810,8 @@ def _get_stats(self, # TPOTs. latency = seq_group.get_last_latency(now) time_per_output_tokens_iter.append(latency) + actual_num_batched_tokens += \ + seq_group.state.current_step - 1 # Because of chunked prefill, we can have a single sequence # group that does multiple prompt_runs. To prevent logging