Skip to content

Commit

Permalink
Address reviewer comments
Browse files Browse the repository at this point in the history
Revert "Skip xfail tests on ROCm to conserve CI resources"

This reverts commit 01fa95f7862ea52b19d96c16a5e1f7752cff3577.
  • Loading branch information
mawong-amd committed Jun 25, 2024
1 parent 40c33ec commit c293b3a
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 23 deletions.
23 changes: 12 additions & 11 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ WORKDIR ${APP_MOUNT}
RUN pip install --upgrade pip
# Remove sccache so it doesn't interfere with ccache
# TODO: implement sccache support across components
RUN apt-get purge -y sccache; pip uninstall -y sccache && rm -rf "$(which sccache)"
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
# Install torch == 2.4.0 on ROCm
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
*"rocm-5.7"*) \
Expand Down Expand Up @@ -79,16 +79,17 @@ ENV CCACHE_DIR=/root/.cache/ccache

### AMD-SMI build stage
FROM base AS build_amdsmi
# Build amdsmi wheel always
RUN cd /opt/rocm/share/amd_smi \
&& pip wheel . --wheel-dir=/install


### Flash-Attention build stage
### Flash-Attention wheel build stage
FROM base AS build_fa
ARG BUILD_FA
ARG FA_GFX_ARCHS
ARG FA_BRANCH
# Build ROCm flash-attention
# Build ROCm flash-attention wheel if `BUILD_FA = 1`
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_FA" = "1" ]; then \
mkdir -p libs \
Expand All @@ -108,11 +109,11 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
fi


### Triton build stage
### Triton wheel build stage
FROM base AS build_triton
ARG BUILD_TRITON
ARG TRITON_BRANCH
# Build triton
# Build triton wheel if `BUILD_TRITON = 1`
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_TRITON" = "1" ]; then \
mkdir -p libs \
Expand Down Expand Up @@ -158,38 +159,38 @@ RUN --mount=type=cache,target=${CCACHE_DIR} \
patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h rocm_patch/rocm_bf16.patch;; \
*"rocm-6.1"*) \
# Bring in upgrades to HIP graph earlier than ROCm 6.2 for vLLM
wget -N https://github.com/ROCm/vllm/raw/main/rocm_patch/libamdhip64.so.6 -P rocm_patch \
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P rocm_patch \
&& cp rocm_patch/libamdhip64.so.6 /opt/rocm/lib/libamdhip64.so.6 \
# Prevent interference if torch bundles its own HIP runtime
&& rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so || true;; \
&& rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true;; \
*) ;; esac \
&& python3 setup.py clean --all \
&& python3 setup.py develop

# Copy amdsmi wheel(s)
# Copy amdsmi wheel into final image
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
mkdir -p libs \
&& cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& pip uninstall -y amdsmi;

# Copy triton wheel(s) if any
# Copy triton wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& pip uninstall -y triton; fi

# Copy flash-attn wheel(s) if any
# Copy flash-attn wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& pip uninstall -y flash-attn; fi

# Install any dependencies that were built
# Install wheels that were built to the final image
RUN --mount=type=cache,target=/root/.cache/pip \
if ls libs/*.whl; then \
pip install libs/*.whl; fi
Expand Down
3 changes: 0 additions & 3 deletions tests/models/test_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from transformers import AutoTokenizer

from vllm.config import VisionLanguageConfig
from vllm.utils import is_hip

from ..conftest import IMAGE_FILES

Expand Down Expand Up @@ -73,8 +72,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str],
return hf_input_ids, hf_output_str


@pytest.mark.skipif(
is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
@pytest.mark.xfail(
reason="Inconsistent image processor being used due to lack "
"of support for dynamic image token replacement")
Expand Down
4 changes: 1 addition & 3 deletions tests/models/test_phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from transformers import AutoTokenizer

from vllm.config import VisionLanguageConfig
from vllm.utils import is_cpu, is_hip
from vllm.utils import is_cpu

from ..conftest import IMAGE_FILES

Expand Down Expand Up @@ -76,8 +76,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str],
# TODO: Add test for `tensor_parallel_size` [ref: PR #3883]
# Since we use _attn_implementation="eager" for hf_runner, here is
# numeric difference for longer context and test can't pass
@pytest.mark.skipif(
is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
@pytest.mark.xfail(
reason="Inconsistent image processor being used due to lack "
"of support for dynamic image token replacement")
Expand Down
5 changes: 0 additions & 5 deletions tests/multimodal/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from vllm.config import ModelConfig, VisionLanguageConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.image import ImagePixelData
from vllm.utils import is_hip

from ..conftest import _STR_DTYPE_TO_TORCH_DTYPE

Expand Down Expand Up @@ -56,8 +55,6 @@ def test_clip_image_processor(hf_images, dtype):
assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"


@pytest.mark.skipif(
is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
@pytest.mark.xfail(
reason="Inconsistent image processor being used due to lack "
"of support for dynamic image token replacement")
Expand Down Expand Up @@ -107,8 +104,6 @@ def test_llava_next_image_processor(hf_images, dtype):
assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"


@pytest.mark.skipif(
is_hip(), reason="ROCm is skipping xfail tests to conserve CI resources")
@pytest.mark.xfail(
reason="Example image pixels were not processed using HuggingFace")
@pytest.mark.parametrize("dtype", ["float"])
Expand Down
8 changes: 7 additions & 1 deletion vllm/worker/worker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,13 @@ def update_environment_variables(envs: Dict[str, str]) -> None:
# suppress the warning in `update_environment_variables`
del os.environ[key]
if is_hip():
os.environ.pop("HIP_VISIBLE_DEVICES", None)
hip_env_var = "HIP_VISIBLE_DEVICES"
if hip_env_var in os.environ:
logger.warning(
"Ignoring pre-set environment variable `%s=%s` as "
"%s has also been set, which takes precedence.",
hip_env_var, os.environ[hip_env_var], key)
os.environ.pop(hip_env_var, None)
update_environment_variables(envs)

def init_worker(self, *args, **kwargs):
Expand Down

0 comments on commit c293b3a

Please sign in to comment.