Skip to content

Commit

Permalink
Merge branch 'main' into libsodium-flags
Browse files Browse the repository at this point in the history
  • Loading branch information
RH-steve-grubb authored Sep 6, 2024
2 parents fa8c8c2 + 341a6b5 commit f5b7004
Show file tree
Hide file tree
Showing 2 changed files with 293 additions and 1 deletion.
291 changes: 291 additions & 0 deletions Dockerfile.rocm.ubi
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
## Global Args #################################################################
ARG BASE_UBI_IMAGE_TAG=9.4
ARG PYTHON_VERSION=3.11
# Default ROCm ARCHes to build vLLM for.
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"

## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as rocm-base

# Max jobs for parallel build
ARG MAX_JOBS=12

ENV BUILD_TARGET='rocm'

USER root

ENV ROCM_VERSION=6.1.2

# Set up ROCm repository and install necessary packages

RUN echo "[amdgpu]" > /etc/yum.repos.d/amdgpu.repo && \
echo "name=amdgpu" >> /etc/yum.repos.d/amdgpu.repo && \
echo "baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.4/main/x86_64/" >> /etc/yum.repos.d/amdgpu.repo && \
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo && \
echo "[ROCm-${ROCM_VERSION}]" >> /etc/yum.repos.d/amdgpu.repo && \
echo "name=ROCm${ROCM_VERSION}" >> /etc/yum.repos.d/amdgpu.repo && \
echo "baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main" >> /etc/yum.repos.d/amdgpu.repo && \
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo

RUN microdnf -y update && \
microdnf -y install rocm hipcc git which && \
microdnf clean all

WORKDIR /workspace

##################################################################################################

FROM rocm-base as python-install
ARG PYTHON_VERSION

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs \
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && \
$VIRTUAL_ENV/bin/pip install --no-cache -U pip wheel && \
microdnf clean all

##################################################################################################

FROM python-install as python-rocm-base

# install common dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt,readonly \
--mount=type=bind,source=requirements-rocm.txt,target=requirements-rocm.txt,readonly \
pip install -r requirements-rocm.txt

##################################################################################################

FROM python-rocm-base as base

# Set the application mount point
ARG APP_MOUNT=/vllm-workspace
WORKDIR ${APP_MOUNT}

# Upgrade pip and remove unnecessary packages
RUN python3 -m pip install --upgrade --no-cache-dir pip && \
microdnf -y remove sccache || true && \
python3 -m pip uninstall -y sccache || true && \
rm -f "$(which sccache)" && \
microdnf clean all && \
rm -rf /var/cache/yum /var/cache/dnf

# Install torch == 2.5.0 on ROCm
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
*"rocm-6.1"*) \
python3 -m pip uninstall -y torch torchvision \
&& python3 -m pip install --no-cache-dir --pre \
torch==2.5.0.dev20240726 \
torchvision==0.20.0.dev20240726 \
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
*) ;; esac

# Set environment variables
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/libtorch/lib
ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include:/opt/rocm/include
ENV PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
ENV CCACHE_DIR=/root/.cache/ccache

##################################################################################################

FROM base as build_base

RUN python3 -m pip install --upgrade --no-cache-dir ninja cmake>=3.26

##################################################################################################

##################################################################################################

### AMD-SMI build stage
FROM build_base AS build_amdsmi

# Build AMD SMI wheel
RUN cd /opt/rocm/share/amd_smi && \
python3 -m pip wheel . --wheel-dir=/install

##################################################################################################

### Flash-Attention wheel build stage
FROM build_base AS build_fa

# Whether to install CK-based flash-attention
ARG BUILD_FA="1"
ARG TRY_FA_WHEEL="1"
ARG FA_WHEEL_URL="https://github.com/ROCm/flash-attention/releases/download/v2.5.9post1-cktile-vllm/flash_attn-2.5.9.post1-cp39-cp39-linux_x86_64.whl"
ARG FA_GFX_ARCHS="gfx90a;gfx942"
ARG FA_BRANCH="23a2b1c2"

# Ensure necessary tools are installed
RUN microdnf install -y wget git && microdnf clean all

# Build ROCm flash-attention wheel if `BUILD_FA` is set to 1
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_FA" = "1" ]; then \
if [ "$TRY_FA_WHEEL" = "1" ] && python3 -m pip install "${FA_WHEEL_URL}"; then \
# If a suitable wheel exists, download it instead of building FA
mkdir -p /install && wget -N "${FA_WHEEL_URL}" -P /install; \
else \
mkdir -p /libs && \
cd /libs && \
git clone https://github.com/ROCm/flash-attention.git && \
cd flash-attention && \
git checkout "${FA_BRANCH}" && \
git submodule update --init && \
GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
fi; \
else \
# Create an empty directory otherwise as later build stages expect one
mkdir -p /install; \
fi

##################################################################################################

### Triton wheel build stage
FROM build_base AS build_triton

# Whether to build triton on rocm
ARG BUILD_TRITON="1"
ARG TRITON_BRANCH="e0fc12c"

# Build triton wheel if `BUILD_TRITON` is set to 1
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_TRITON" = "1" ]; then \
mkdir -p /libs && cd /libs && \
git clone https://github.com/OpenAI/triton.git && \
cd triton && \
git checkout "${TRITON_BRANCH}" && \
cd python && \
python3 setup.py bdist_wheel --dist-dir=/install; \
else \
# Create an empty directory otherwise as later build stages expect one
mkdir -p /install; \
fi

##################################################################################################

### Final vLLM build stage
FROM build_base AS final

# Import the vLLM development directory from the build context
COPY . .

# Install wget only if it is needed
RUN microdnf -y install wget && microdnf clean all

# Package upgrades to avoid dependency issues and add functionality
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install --upgrade numba scipy huggingface-hub[cli] && \
microdnf clean all

ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"

# Set environment variables for runtime
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
# Silences the HF Tokenizers warning
ENV TOKENIZERS_PARALLELISM=false

# Install dependencies from requirements file and apply ROCm specific patches
RUN --mount=type=cache,target=${CCACHE_DIR} \
--mount=type=cache,target=/root/.cache/pip \
python3 -m pip install -Ur requirements-rocm.txt && \
ROCM_VERSION=$(ls /opt | grep -Po 'rocm-[0-9]+\.[0-9]+') && \
case "$ROCM_VERSION" in \
"rocm-6.1") \
# Apply patch for ROCm 6.1
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib && \
# Remove potentially conflicting HIP runtime from torch
rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true ;; \
*) \
echo "ROCm version $ROCM_VERSION is not supported for patching." ;; \
esac && \
python3 setup.py clean --all && \
python3 setup.py bdist_wheel --dist-dir=dist

##################################################################################################

FROM base AS vllm-openai

WORKDIR /workspace

# Set up the virtual environment and update PATH
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH=$VIRTUAL_ENV/bin:$PATH

# Install necessary build tools
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc && \
microdnf clean all

# Copy amdsmi wheel into final image
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
mkdir -p libs \
&& cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y amdsmi;

# Copy triton wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y triton; fi

# Copy flash-attn wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y flash-attn; fi

# Copy vLLM wheel(s) into the final image
RUN --mount=type=bind,from=final,src=/vllm-workspace/dist,target=/dist \
--mount=type=cache,target=/root/.cache/pip \
cp /dist/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y vllm

# Install wheels that were built to the final image
RUN --mount=type=cache,target=/root/.cache/pip \
if ls libs/*.whl; then \
python3 -m pip install libs/*.whl; fi

# Environment variables for runtime configuration
ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
HOME=/home/vllm \
VLLM_USAGE_SOURCE=production-docker-image

# Set up a non-root user for OpenShift
RUN umask 002 && \
useradd --uid 2000 --gid 0 vllm && \
mkdir -p /licenses && \
chmod g+rwx $HOME /usr/src /workspace

COPY LICENSE /licenses/vllm.md

ENV HF_HUB_OFFLINE=1 \
HOME=/home/vllm \
# Allow requested max length to exceed what is extracted from the
# config.json
# see: https://github.com/vllm-project/vllm/pull/7080
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
VLLM_USAGE_SOURCE=production-docker-image \
VLLM_WORKER_MULTIPROC_METHOD=fork \
VLLM_NO_USAGE_STATS=1

# Switch to the non-root user
USER 2000

# Set the entrypoint
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
3 changes: 2 additions & 1 deletion Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
ARG PYTHON_VERSION

RUN microdnf install -y \
RUN microdnf -y update && \
microdnf install -y \
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
&& microdnf clean all

Expand Down

0 comments on commit f5b7004

Please sign in to comment.