Skip to content

Commit

Permalink
small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-neuralmagic committed Aug 27, 2024
1 parent dd38054 commit d312acb
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 5 deletions.
5 changes: 1 addition & 4 deletions examples/offline_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

# Create an LLM.
llm = LLM(model="facebook/opt-125m",
num_scheduler_steps=8,
use_v2_block_manager=True,
disable_async_output_proc=False)
llm = LLM(model="facebook/opt-125m")
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
Expand Down
1 change: 0 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,7 +1105,6 @@ def schedule(
if not self.cache_config.enable_prefix_caching:
common_computed_block_nums = []

# TODO: Combine multi-step and async postprocessor
allow_async_output_proc: bool = self.use_async_output_proc

# Create input data structures.
Expand Down

0 comments on commit d312acb

Please sign in to comment.