Skip to content

Commit f0eca4e

Browse files
ywang96prashantgupta24
authored andcommitted
[Misc] Add example for LLaVA-NeXT (vllm-project#5879)
1 parent 3be2439 commit f0eca4e

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

examples/llava_next_example.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from io import BytesIO
2+
3+
import requests
4+
from PIL import Image
5+
6+
from vllm import LLM, SamplingParams
7+
from vllm.multimodal.image import ImagePixelData
8+
9+
# Dynamic image input is currently not supported and therefore
10+
# a fixed image input shape and its corresponding feature size is required.
11+
# See https://github.com/vllm-project/vllm/pull/4199 for the complete
12+
# configuration matrix.
13+
14+
llm = LLM(
15+
model="llava-hf/llava-v1.6-mistral-7b-hf",
16+
image_input_type="pixel_values",
17+
image_token_id=32000,
18+
image_input_shape="1,3,336,336",
19+
image_feature_size=1176,
20+
)
21+
22+
prompt = "[INST] " + "<image>" * 1176 + "\nWhat is shown in this image? [/INST]"
23+
url = "https://h2o-release.s3.amazonaws.com/h2ogpt/bigben.jpg"
24+
image = Image.open(BytesIO(requests.get(url).content))
25+
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=100)
26+
27+
outputs = llm.generate(
28+
{
29+
"prompt": prompt,
30+
"multi_modal_data": ImagePixelData(image),
31+
},
32+
sampling_params=sampling_params)
33+
34+
generated_text = ""
35+
for o in outputs:
36+
generated_text += o.outputs[0].text
37+
38+
print(f"LLM output:{generated_text}")

0 commit comments

Comments
 (0)