Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-neuralmagic committed Aug 29, 2024
1 parent e26b18b commit 942bc12
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
6 changes: 6 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,12 +1256,18 @@ def _process_model_outputs(self,
"""Apply the model output to the sequences in the scheduled seq groups.
virtual_engine: The engine id to operate on
is_async: Indicates whether this postprocessor runs in
parallel with the GPU forward pass and is processing
tokens from the previous step. If this is true, then
no tokens need to be appended since it is already done
externally (before the next schedule() call)
sampler_output: Used with multi-step execution to provide
sampler_output of each step
is_last_output: Used with multi-step execution to indicate
the last step (of each multi-step group)
Returns RequestOutputs that can be returned to the client.
"""
now = time.time()
Expand Down
10 changes: 6 additions & 4 deletions vllm/worker/multi_step_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,12 @@ def execute_model(

# Pythonize the output and block if needed since it is the last step
if model_input.is_last_step:
outputs = self._final_process_outputs(
model_input,
model_input.frozen_model_input.async_callback. # type: ignore
keywords["output_proc_callback"]) # type: ignore
assert model_input.frozen_model_input is not None
async_callback = model_input.frozen_model_input.async_callback # type: ignore
output_proc_callback = async_callback.keywords[
"output_proc_callback"] if async_callback is not None else None
outputs = self._final_process_outputs(model_input,
output_proc_callback)
return outputs

# should be [SamplerOutput]
Expand Down

0 comments on commit 942bc12

Please sign in to comment.