diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index 4872929ec36cc..e55a0ce137ed6 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -422,7 +422,9 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs): prompt = llm_inputs.get("prompt") if prompt is None: - image_idx = [] + # for async server request, we assume prompt and its token_ids is always + # in correct format. And num_image_tags == len(image_data) always True. + image_idx = range(1, len(image_data) + 1) new_prompt = None else: image_idx = sorted(map(int, re.findall(r"<\|image_(\d+)\|>+", prompt)))