From c293b3a1b656dce66d28ce970ae342eed64876cf Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Mon, 24 Jun 2024 18:46:54 +0000 Subject: [PATCH] Address reviewer comments Revert "Skip xfail tests on ROCm to conserve CI resources" This reverts commit 01fa95f7862ea52b19d96c16a5e1f7752cff3577. --- Dockerfile.rocm | 23 ++++++++++++----------- tests/models/test_llava_next.py | 3 --- tests/models/test_phi3v.py | 4 +--- tests/multimodal/test_processor.py | 5 ----- vllm/worker/worker_base.py | 8 +++++++- 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index a981705997b2..652f04adf895 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -51,7 +51,7 @@ WORKDIR ${APP_MOUNT} RUN pip install --upgrade pip # Remove sccache so it doesn't interfere with ccache # TODO: implement sccache support across components -RUN apt-get purge -y sccache; pip uninstall -y sccache && rm -rf "$(which sccache)" +RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)" # Install torch == 2.4.0 on ROCm RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \ *"rocm-5.7"*) \ @@ -79,16 +79,17 @@ ENV CCACHE_DIR=/root/.cache/ccache ### AMD-SMI build stage FROM base AS build_amdsmi +# Build amdsmi wheel always RUN cd /opt/rocm/share/amd_smi \ && pip wheel . --wheel-dir=/install -### Flash-Attention build stage +### Flash-Attention wheel build stage FROM base AS build_fa ARG BUILD_FA ARG FA_GFX_ARCHS ARG FA_BRANCH -# Build ROCm flash-attention +# Build ROCm flash-attention wheel if `BUILD_FA = 1` RUN --mount=type=cache,target=${CCACHE_DIR} \ if [ "$BUILD_FA" = "1" ]; then \ mkdir -p libs \ @@ -108,11 +109,11 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \ fi -### Triton build stage +### Triton wheel build stage FROM base AS build_triton ARG BUILD_TRITON ARG TRITON_BRANCH -# Build triton +# Build triton wheel if `BUILD_TRITON = 1` RUN --mount=type=cache,target=${CCACHE_DIR} \ if [ "$BUILD_TRITON" = "1" ]; then \ mkdir -p libs \ @@ -158,22 +159,22 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \ patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h rocm_patch/rocm_bf16.patch;; \ *"rocm-6.1"*) \ # Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM - wget -N https://github.com/ROCm/vllm/raw/main/rocm_patch/libamdhip64.so.6 -P rocm_patch \ + wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P rocm_patch \ && cp rocm_patch/libamdhip64.so.6 /opt/rocm/lib/libamdhip64.so.6 \ # Prevent interference if torch bundles its own HIP runtime - && rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so || true;; \ + && rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \ *) ;; esac \ && python3 setup.py clean --all \ && python3 setup.py develop -# Copy amdsmi wheel(s) +# Copy amdsmi wheel into final image RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \ mkdir -p libs \ && cp /install/*.whl libs \ # Preemptively uninstall to avoid same-version no-installs && pip uninstall -y amdsmi; -# Copy triton wheel(s) if any +# Copy triton wheel(s) into final image if they were built RUN --mount=type=bind,from=build_triton,src=/install,target=/install \ mkdir -p libs \ && if ls /install/*.whl; then \ @@ -181,7 +182,7 @@ RUN --mount=type=bind,from=build_triton,src=/install,target=/install \ # Preemptively uninstall to avoid same-version no-installs && pip uninstall -y triton; fi -# Copy flash-attn wheel(s) if any +# Copy flash-attn wheel(s) into final image if they were built RUN --mount=type=bind,from=build_fa,src=/install,target=/install \ mkdir -p libs \ && if ls /install/*.whl; then \ @@ -189,7 +190,7 @@ RUN --mount=type=bind,from=build_fa,src=/install,target=/install \ # Preemptively uninstall to avoid same-version no-installs && pip uninstall -y flash-attn; fi -# Install any dependencies that were built +# Install wheels that were built to the final image RUN --mount=type=cache,target=/root/.cache/pip \ if ls libs/*.whl; then \ pip install libs/*.whl; fi diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py index 94079ef08b9d..0eca5cb5330c 100644 --- a/tests/models/test_llava_next.py +++ b/tests/models/test_llava_next.py @@ -4,7 +4,6 @@ from transformers import AutoTokenizer from vllm.config import VisionLanguageConfig -from vllm.utils import is_hip from ..conftest import IMAGE_FILES @@ -73,8 +72,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str], return hf_input_ids, hf_output_str -@pytest.mark.skipif( - is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources") @pytest.mark.xfail( reason="Inconsistent image processor being used due to lack " "of support for dynamic image token replacement") diff --git a/tests/models/test_phi3v.py b/tests/models/test_phi3v.py index 2e0a2740d65f..a29d50df4c4e 100644 --- a/tests/models/test_phi3v.py +++ b/tests/models/test_phi3v.py @@ -4,7 +4,7 @@ from transformers import AutoTokenizer from vllm.config import VisionLanguageConfig -from vllm.utils import is_cpu, is_hip +from vllm.utils import is_cpu from ..conftest import IMAGE_FILES @@ -76,8 +76,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str], # TODO: Add test for `tensor_parallel_size` [ref: PR #3883] # Since we use _attn_implementation="eager" for hf_runner, here is # numeric difference for longer context and test can't pass -@pytest.mark.skipif( - is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources") @pytest.mark.xfail( reason="Inconsistent image processor being used due to lack " "of support for dynamic image token replacement") diff --git a/tests/multimodal/test_processor.py b/tests/multimodal/test_processor.py index 91ceb7e81bcc..51c352361702 100644 --- a/tests/multimodal/test_processor.py +++ b/tests/multimodal/test_processor.py @@ -5,7 +5,6 @@ from vllm.config import ModelConfig, VisionLanguageConfig from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.image import ImagePixelData -from vllm.utils import is_hip from ..conftest import _STR_DTYPE_TO_TORCH_DTYPE @@ -56,8 +55,6 @@ def test_clip_image_processor(hf_images, dtype): assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}" -@pytest.mark.skipif( - is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources") @pytest.mark.xfail( reason="Inconsistent image processor being used due to lack " "of support for dynamic image token replacement") @@ -107,8 +104,6 @@ def test_llava_next_image_processor(hf_images, dtype): assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}" -@pytest.mark.skipif( - is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources") @pytest.mark.xfail( reason="Example image pixels were not processed using HuggingFace") @pytest.mark.parametrize("dtype", ["float"]) diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index 170c741e3e6c..99482aa93bc5 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -126,7 +126,13 @@ def update_environment_variables(envs: Dict[str, str]) -> None: # suppress the warning in `update_environment_variables` del os.environ[key] if is_hip(): - os.environ.pop("HIP_VISIBLE_DEVICES", None) + hip_env_var = "HIP_VISIBLE_DEVICES" + if hip_env_var in os.environ: + logger.warning( + "Ignoring pre-set environment variable `%s=%s` as " + "%s has also been set, which takes precedence.", + hip_env_var, os.environ[hip_env_var], key) + os.environ.pop(hip_env_var, None) update_environment_variables(envs) def init_worker(self, *args, **kwargs):