From c293b3a1b656dce66d28ce970ae342eed64876cf Mon Sep 17 00:00:00 2001
From: Matthew Wong <Matthew.Wong2@amd.com>
Date: Mon, 24 Jun 2024 18:46:54 +0000
Subject: [PATCH] Address reviewer comments

Revert "Skip xfail tests on ROCm to conserve CI resources"

This reverts commit 01fa95f7862ea52b19d96c16a5e1f7752cff3577.
---
 Dockerfile.rocm                    | 23 ++++++++++++-----------
 tests/models/test_llava_next.py    |  3 ---
 tests/models/test_phi3v.py         |  4 +---
 tests/multimodal/test_processor.py |  5 -----
 vllm/worker/worker_base.py         |  8 +++++++-
 5 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
index a981705997b2..652f04adf895 100644
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -51,7 +51,7 @@ WORKDIR ${APP_MOUNT}
 RUN pip install --upgrade pip
 # Remove sccache so it doesn't interfere with ccache
 # TODO: implement sccache support across components
-RUN apt-get purge -y sccache; pip uninstall -y sccache && rm -rf "$(which sccache)"
+RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
 # Install torch == 2.4.0 on ROCm
 RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
         *"rocm-5.7"*) \
@@ -79,16 +79,17 @@ ENV CCACHE_DIR=/root/.cache/ccache
 
 ### AMD-SMI build stage
 FROM base AS build_amdsmi
+# Build amdsmi wheel always
 RUN cd /opt/rocm/share/amd_smi \
     && pip wheel . --wheel-dir=/install
 
 
-### Flash-Attention build stage
+### Flash-Attention wheel build stage
 FROM base AS build_fa
 ARG BUILD_FA
 ARG FA_GFX_ARCHS
 ARG FA_BRANCH
-# Build ROCm flash-attention
+# Build ROCm flash-attention wheel if `BUILD_FA = 1`
 RUN --mount=type=cache,target=${CCACHE_DIR} \
     if [ "$BUILD_FA" = "1" ]; then \
     mkdir -p libs \
@@ -108,11 +109,11 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
     fi
 
 
-### Triton build stage
+### Triton wheel build stage
 FROM base AS build_triton
 ARG BUILD_TRITON
 ARG TRITON_BRANCH
-# Build triton
+# Build triton wheel if `BUILD_TRITON = 1`
 RUN --mount=type=cache,target=${CCACHE_DIR} \
     if [ "$BUILD_TRITON" = "1" ]; then \
     mkdir -p libs \
@@ -158,22 +159,22 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
             patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h rocm_patch/rocm_bf16.patch;; \
         *"rocm-6.1"*) \
             # Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM
-            wget -N https://github.com/ROCm/vllm/raw/main/rocm_patch/libamdhip64.so.6 -P rocm_patch \
+            wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P rocm_patch \
             && cp rocm_patch/libamdhip64.so.6 /opt/rocm/lib/libamdhip64.so.6 \
             # Prevent interference if torch bundles its own HIP runtime
-            && rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so || true;; \
+            && rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \
         *) ;; esac \
     && python3 setup.py clean --all \
     && python3 setup.py develop
 
-# Copy amdsmi wheel(s)
+# Copy amdsmi wheel into final image
 RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
     mkdir -p libs \
     && cp /install/*.whl libs \
     # Preemptively uninstall to avoid same-version no-installs
     && pip uninstall -y amdsmi;
 
-# Copy triton wheel(s) if any
+# Copy triton wheel(s) into final image if they were built
 RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
     mkdir -p libs \
     && if ls /install/*.whl; then \
@@ -181,7 +182,7 @@ RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
         # Preemptively uninstall to avoid same-version no-installs
         && pip uninstall -y triton; fi
 
-# Copy flash-attn wheel(s) if any
+# Copy flash-attn wheel(s) into final image if they were built
 RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
     mkdir -p libs \
     && if ls /install/*.whl; then \
@@ -189,7 +190,7 @@ RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
         # Preemptively uninstall to avoid same-version no-installs
         && pip uninstall -y flash-attn; fi
 
-# Install any dependencies that were built
+# Install wheels that were built to the final image
 RUN --mount=type=cache,target=/root/.cache/pip \
     if ls libs/*.whl; then \
     pip install libs/*.whl; fi
diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py
index 94079ef08b9d..0eca5cb5330c 100644
--- a/tests/models/test_llava_next.py
+++ b/tests/models/test_llava_next.py
@@ -4,7 +4,6 @@
 from transformers import AutoTokenizer
 
 from vllm.config import VisionLanguageConfig
-from vllm.utils import is_hip
 
 from ..conftest import IMAGE_FILES
 
@@ -73,8 +72,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str],
     return hf_input_ids, hf_output_str
 
 
-@pytest.mark.skipif(
-    is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
 @pytest.mark.xfail(
     reason="Inconsistent image processor being used due to lack "
     "of support for dynamic image token replacement")
diff --git a/tests/models/test_phi3v.py b/tests/models/test_phi3v.py
index 2e0a2740d65f..a29d50df4c4e 100644
--- a/tests/models/test_phi3v.py
+++ b/tests/models/test_phi3v.py
@@ -4,7 +4,7 @@
 from transformers import AutoTokenizer
 
 from vllm.config import VisionLanguageConfig
-from vllm.utils import is_cpu, is_hip
+from vllm.utils import is_cpu
 
 from ..conftest import IMAGE_FILES
 
@@ -76,8 +76,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str],
 # TODO: Add test for `tensor_parallel_size` [ref: PR #3883]
 # Since we use _attn_implementation="eager" for hf_runner, here is
 # numeric difference for longer context and test can't pass
-@pytest.mark.skipif(
-    is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
 @pytest.mark.xfail(
     reason="Inconsistent image processor being used due to lack "
     "of support for dynamic image token replacement")
diff --git a/tests/multimodal/test_processor.py b/tests/multimodal/test_processor.py
index 91ceb7e81bcc..51c352361702 100644
--- a/tests/multimodal/test_processor.py
+++ b/tests/multimodal/test_processor.py
@@ -5,7 +5,6 @@
 from vllm.config import ModelConfig, VisionLanguageConfig
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.image import ImagePixelData
-from vllm.utils import is_hip
 
 from ..conftest import _STR_DTYPE_TO_TORCH_DTYPE
 
@@ -56,8 +55,6 @@ def test_clip_image_processor(hf_images, dtype):
             assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"
 
 
-@pytest.mark.skipif(
-    is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
 @pytest.mark.xfail(
     reason="Inconsistent image processor being used due to lack "
     "of support for dynamic image token replacement")
@@ -107,8 +104,6 @@ def test_llava_next_image_processor(hf_images, dtype):
             assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"
 
 
-@pytest.mark.skipif(
-    is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
 @pytest.mark.xfail(
     reason="Example image pixels were not processed using HuggingFace")
 @pytest.mark.parametrize("dtype", ["float"])
diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
index 170c741e3e6c..99482aa93bc5 100644
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -126,7 +126,13 @@ def update_environment_variables(envs: Dict[str, str]) -> None:
             # suppress the warning in `update_environment_variables`
             del os.environ[key]
             if is_hip():
-                os.environ.pop("HIP_VISIBLE_DEVICES", None)
+                hip_env_var = "HIP_VISIBLE_DEVICES"
+                if hip_env_var in os.environ:
+                    logger.warning(
+                        "Ignoring pre-set environment variable `%s=%s` as "
+                        "%s has also been set, which takes precedence.",
+                        hip_env_var, os.environ[hip_env_var], key)
+                os.environ.pop(hip_env_var, None)
         update_environment_variables(envs)
 
     def init_worker(self, *args, **kwargs):