diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 98b55032cd1b..61f855d0e8e4 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -871,6 +871,9 @@ def create_engine_config(self, ) -> EngineConfig: if self.enable_chunked_prefill: raise ValueError("Chunked prefill is not supported with " "multi-step (--num-scheduler-steps > 1)") + if not self.use_v2_block_manager: + raise ValueError("BlockSpaceManagerV2 is required for " + "multi-step (--num-scheduler-steps > 1)") # make sure num_lookahead_slots is set the higher value depending on # if we are using speculative decoding or multi-step diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py index bb48f659989c..21e90757d302 100644 --- a/vllm/worker/multi_step_model_runner.py +++ b/vllm/worker/multi_step_model_runner.py @@ -497,8 +497,9 @@ def _pythonize_sampler_output( next_token_ids = sample_result parent_ids = [0] seq_outputs: List[SequenceOutput] = [] - assert len(seq_group.sampling_params.logits_processors) == 0, ( - "Logits Processors are not supported in multi-step decoding") + if seq_group.sampling_params.logits_processors: + assert len(seq_group.sampling_params.logits_processors) == 0, ( + "Logits Processors are not supported in multi-step decoding") for parent_id, next_token_id in zip(parent_ids, next_token_ids): # TODO(will): support logprobs # Hard coded logprob