Skip to content

Commit

Permalink
Fix dummy data seq padding for multimodal qwen
Browse files Browse the repository at this point in the history
Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
  • Loading branch information
alex-jw-brooks committed Sep 5, 2024
1 parent 4f25926 commit 2ac3008
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions vllm/model_executor/models/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,9 +830,7 @@ def dummy_data_for_qwen(
# Build the image prompts with no imgpads; the tokenizer will add img pads
image_prompt = ''.join(
[get_image_text(idx, False) for idx in range(1, num_images + 1)])
toks = tokenizer.encode(image_prompt,
add_special_tokens=False,
return_tensors="pt")[0].tolist()
toks = tokenizer.encode(image_prompt, add_special_tokens=False)

# Make sure we actually get the fixed context size per tok padding
num_pads = toks.count(tokenizer.encode(IMG_PAD)[0])
Expand All @@ -842,6 +840,10 @@ def dummy_data_for_qwen(
f" per image, but got {num_pads} pads for {num_images} image(s)"
" in total. Are you using a qwen tokenizer?")

# Ensure the number of tokens is at minimum the sequence length provided
if len(toks) < seq_len:
toks += [0] * (seq_len - len(toks))

# Build the input images; width/height doesn't actually matter here since
# the data will get resized and the # of tokens per image is constant
image = Image.new("RGB", (224, 224), color=0)
Expand Down

0 comments on commit 2ac3008

Please sign in to comment.