[BugFix] Ensure worker model loop is always stopped at the right time (…

…vllm-project#5987)
neuralmagic · Jul 7, 2024 · ef17c9b · ef17c9b
1 parent 3555aff
commit ef17c9b
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -840,7 +840,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
         # Tracing
         self.do_tracing(scheduler_outputs)
 
-        if not request_outputs:
+        if not self.has_unfinished_requests():
             # Stop the execute model loop in parallel workers until there are
             # more requests to process. This avoids waiting indefinitely in
             # torch.distributed ops which may otherwise timeout, and unblocks