diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 21cbd9ba1378..4edd1cadfb2f 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -62,7 +62,6 @@ steps:
   mirror_hardwares: [amd]
 
   commands:
-  - pytest -v -s test_inputs.py
   - pytest -v -s entrypoints -m llm
   - pytest -v -s entrypoints -m openai
 
@@ -79,6 +78,13 @@ steps:
     - python3 llava_example.py
     - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
 
+- label: Inputs Test
+  #mirror_hardwares: [amd]
+  commands:
+    - bash ../.buildkite/download-images.sh
+    - pytest -v -s test_inputs.py
+    - pytest -v -s multimodal
+
 - label: Kernels Test %N
   #mirror_hardwares: [amd]
   command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
diff --git a/tests/multimodal/test_processor.py b/tests/multimodal/test_processor.py
index 4aeae633d07f..3df28e782dd8 100644
--- a/tests/multimodal/test_processor.py
+++ b/tests/multimodal/test_processor.py
@@ -6,8 +6,10 @@
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.image import ImagePixelData
 
+from ..conftest import _STR_DTYPE_TO_TORCH_DTYPE
 
-@pytest.mark.parametrize("dtype", ["half", "bfloat16", "float"])
+
+@pytest.mark.parametrize("dtype", ["half", "float"])
 def test_clip_image_processor(hf_images, dtype):
     MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
     IMAGE_HEIGHT = IMAGE_WIDTH = 33
@@ -36,8 +38,8 @@ def test_clip_image_processor(hf_images, dtype):
     for image in hf_images:
         hf_result = hf_processor.preprocess(
             image,
-            return_tensors="np",
-        )
+            return_tensors="pt",
+        ).to(dtype=_STR_DTYPE_TO_TORCH_DTYPE[dtype])
         vllm_result = MULTIMODAL_REGISTRY.process_input(
             ImagePixelData(image),
             model_config=model_config,
@@ -45,7 +47,8 @@ def test_clip_image_processor(hf_images, dtype):
         )
 
         assert hf_result.keys() == vllm_result.keys()
-        for key, hf_arr in hf_result.items():
+        for key, hf_tensor in hf_result.items():
+            hf_arr: np.ndarray = hf_tensor.numpy()
             vllm_arr: np.ndarray = vllm_result[key].numpy()
 
             assert hf_arr.shape == vllm_arr.shape, f"Failed for key={key}"