diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 0b801a4c4..2aa0e3441 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -3,7 +3,7 @@ name: Publish Docker image on: # Allow manual runs workflow_dispatch: - + # Only run for push on the main branch or for tagged version push: branches: @@ -15,13 +15,10 @@ env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} - permissions: packages: write - # define build arguments - jobs: build-image: strategy: @@ -30,14 +27,24 @@ jobs: include: - arch: cuda - arch: rocm + - arch: xpu + - arch: hpu - runs-on: ubuntu-latest + runs-on: ubuntu-latest permissions: contents: read packages: write steps: + - uses: easimon/maximize-build-space@master + with: + remove-dotnet: 'true' + remove-codeql: 'true' + remove-haskell: 'true' + remove-android: 'true' + build-mount-path: /home/runner/work/milabench/ + - name: Show all images run: | docker image ls @@ -48,10 +55,19 @@ jobs: # The images are still on github registry docker image prune -f -a --filter "until=336h" docker system prune -f + sudo apt install jq -y + jq '. + { "data-root": "/home/runner/work/milabench/docker" }' /etc/docker/daemon.json > newconfig.json + sudo mv -f newconfig.json /etc/docker/daemon.json + cat /etc/docker/daemon.json + sudo systemctl stop docker.service + sudo systemctl stop docker.socket + sudo systemctl start docker.socket + sudo systemctl start docker.service + docker info - name: Check out the repo uses: actions/checkout@v3 - + - name: Get Image Tag Name env: GITHUB_REF_NAME_ENV: ${{ github.ref_name }} @@ -62,14 +78,14 @@ jobs: IMAGE_TAG="${GITHUB_REF_NAME##*/}" fi echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV - + - name: Log in to the registry uses: docker/login-action@v2 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Extract metadata (tags, labels) for the image id: meta uses: docker/metadata-action@v4 diff --git a/docker/Dockerfile-cuda b/docker/Dockerfile-cuda index 6e7641844..da9c2c96b 100644 --- a/docker/Dockerfile-cuda +++ b/docker/Dockerfile-cuda @@ -1,7 +1,7 @@ # FROM ubuntu:22.04 # For cuda-gdb -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 # Arguments # --------- @@ -15,6 +15,10 @@ ENV MILABENCH_GPU_ARCH=$ARCH ENV MILABENCH_CONFIG_NAME=$CONFIG ENV MILABENCH_DOCKER=1 +ENV CUDA_VER=12.1 +ENV MELLANOX_KEY="https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox" +ENV MELLANOX_LIST="https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list" + # Paths # ----- @@ -41,18 +45,18 @@ COPY . /milabench/milabench/ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y &&\ - apt-get install -y --no-install-recommends git build-essential curl python3 python-is-python3 python3-pip &&\ - curl -o /etc/apt/trusted.gpg.d/mellanox.asc https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox &&\ - curl -o /etc/apt/sources.list.d/mellanox.list https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list &&\ + apt-get install -y --no-install-recommends git build-essential curl python3.10 python-is-python3 python3-pip &&\ + curl -o /etc/apt/trusted.gpg.d/mellanox.asc $MELLANOX_KEY &&\ + curl -o /etc/apt/sources.list.d/mellanox.list $MELLANOX_LIST &&\ apt-get update -y &&\ apt-get install -y --no-install-recommends libibverbs1 &&\ apt-get clean &&\ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* # Install Rust RUN curl https://sh.rustup.rs -sSf | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" -ENV CUDA_HOME=/usr/local/cuda-11.8 +ENV CUDA_HOME="/usr/local/cuda-${CUDA_VER}" # Install Milabench # ----------------- @@ -71,6 +75,6 @@ RUN python -m pip install -U pip &&\ ENV PIP_DEFAULT_TIMEOUT=800 RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ - python -m pip cache purge + python -m pip cache purge CMD milabench run diff --git a/docker/Dockerfile-hpu b/docker/Dockerfile-hpu new file mode 100644 index 000000000..90fac3849 --- /dev/null +++ b/docker/Dockerfile-hpu @@ -0,0 +1,80 @@ + +FROM ubuntu:22.04 + +# Arguments +# --------- + +ARG ARCH=hpu +ENV MILABENCH_GPU_ARCH=$ARCH + +ARG CONFIG=standard.yaml +ENV MILABENCH_CONFIG_NAME=$CONFIG +ENV MILABENCH_DOCKER=1 + +ARG PYTHON="3.10" + +ENV HABANA_INSTALLER=https://vault.habana.ai/artifactory/gaudi-installer/1.16.1/habanalabs-installer.sh + +# Paths +# ----- + +ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME +ENV MILABENCH_BASE=/milabench/envs +ENV MILABENCH_ARGS="" +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" + + +# Copy milabench +# -------------- + +WORKDIR /milabench +COPY . /milabench/milabench/ + + +# Install Dependencies +# -------------------- + +# curl: used to download anaconda +# git: used by milabench +# rustc: used by BERT models inside https://pypi.org/project/tokenizers/ +# build-essential: for rust + +RUN apt-get update &&\ + apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\ + apt-get clean &&\ + rm -rf /var/lib/apt/lists/* &&\ + curl -L -o habana_installer.sh -s ${HABANA_INSTALLER} &&\ + chmod +x habana_installer.sh + +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install Milabench +# ----------------- + +# Have to install habana in the system env too... +# so we can monitor the HPU.. +RUN python -m pip install -U pip &&\ + python -m pip install -U setuptools &&\ + python -m pip install -U poetry &&\ + python -m pip install -e /milabench/milabench/ &&\ + ./habana_installer.sh install -t dependencies &&\ + ./habana_installer.sh install -t pytorch &&\ + python -m pip cache purge + +# Prepare bench +# ------------- + +# pip times out often when downloading pytorch +ENV PIP_DEFAULT_TIMEOUT=800 +ENV HABANALABS_VIRTUAL_DIR=$BENCHMARK_VENV/torch + +# Install habana in the benchmark environment +RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ + ./habana_installer.sh install -t dependencies --venv -y &&\ + ./habana_installer.sh install -t pytorch --venv -y &&\ + python -m pip cache purge &&\ + rm -rf habana_installer.sh + +CMD ["milabench", "run"] diff --git a/docker/Dockerfile-rocm b/docker/Dockerfile-rocm index 504660848..2290c40ef 100644 --- a/docker/Dockerfile-rocm +++ b/docker/Dockerfile-rocm @@ -1,3 +1,4 @@ + FROM ubuntu:22.04 # Arguments @@ -10,16 +11,17 @@ ARG CONFIG=standard.yaml ENV MILABENCH_CONFIG_NAME=$CONFIG ENV MILABENCH_DOCKER=1 +ARG PYTHON="3.10" + # Paths # ----- ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME ENV MILABENCH_BASE=/milabench/envs -ENV MILABENCH_OUTPUT=/milabench/results/ ENV MILABENCH_ARGS="" -ENV CONDA_PATH=/opt/anaconda - +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" # Copy milabench # -------------- @@ -37,22 +39,13 @@ COPY . /milabench/milabench/ # build-essential: for rust RUN apt-get update &&\ - apt-get install -y git build-essential curl &&\ + apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\ apt-get clean &&\ rm -rf /var/lib/apt/lists/* RUN curl https://sh.rustup.rs -sSf | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" -# Install Python -# -------------- - -# Install anaconda because milabench will need it later anyway -RUN curl https://repo.anaconda.com/miniconda/Miniconda3-py39_23.1.0-1-Linux-x86_64.sh -o ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p $CONDA_PATH && rm ~/miniconda.sh -ENV PATH=$CONDA_PATH/bin:$PATH - - # Install Milabench # ----------------- diff --git a/docker/Dockerfile-xpu b/docker/Dockerfile-xpu new file mode 100644 index 000000000..d8e524925 --- /dev/null +++ b/docker/Dockerfile-xpu @@ -0,0 +1,79 @@ + + +FROM ubuntu:22.04 + +# Arguments +# --------- + +ARG ARCH=xpu +ENV MILABENCH_GPU_ARCH=$ARCH + +ARG CONFIG=standard.yaml +ENV MILABENCH_CONFIG_NAME=$CONFIG +ENV MILABENCH_DOCKER=1 + +ARG PYTHON="3.10" + +ENV XPU_MANAGER="V1.2.36/xpumanager_1.2.36_20240428.081009.377f9162.u22.04_amd64.deb" + +# Paths +# ----- + +ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME +ENV MILABENCH_BASE=/milabench/base +ENV MILABENCH_ARGS="" + +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" + +# Copy milabench +# -------------- + +WORKDIR /milabench +COPY . /milabench/milabench/ + + +# Install Dependencies +# -------------------- + +# curl: used to download anaconda +# git: used by milabench +# rustc: used by BERT models inside https://pypi.org/project/tokenizers/ +# build-essential: for rust + +RUN apt-get update &&\ + apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\ + apt-get clean &&\ + rm -rf /var/lib/apt/lists/* &&\ + curl -L -o xpu_manager.deb -s https://github.com/intel/xpumanager/releases/download/${XPU_MANAGER} &&\ + dpkg -i xpu_manager.deb &&\ + rm -rf xpu_manager.deb + + +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install Milabench +# ----------------- + +RUN python -m pip install -U pip &&\ + python -m pip install -U setuptools &&\ + python -m pip install -U poetry &&\ + python -m pip install -e /milabench/milabench/ &&\ + python -m pip cache purge + + +# Prepare bench +# ------------- + +# pip times out often when downloading pytorch +ENV PIP_DEFAULT_TIMEOUT=800 + +# Uninstall default pytorch +# reinstall pytorch with the right extensions... +RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ + /bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip uninstall torch torchvision torchaudio" &&\ + /bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip install torch torchvision torchaudio intel-extension-for-pytorch oneccl_bind_pt intel-extension-for-pytorch-deepspeed --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" &&\ + python -m pip cache purge + +CMD ["milabench", "run"]