Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Dockerfile-xpu & Dockerfile-hpu #247

Draft
wants to merge 26 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Publish Docker image
on:
# Allow manual runs
workflow_dispatch:

# Only run for push on the main branch or for tagged version
push:
branches:
Expand All @@ -15,13 +15,10 @@ env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}


permissions:
packages: write


# define build arguments

jobs:
build-image:
strategy:
Expand All @@ -30,14 +27,24 @@ jobs:
include:
- arch: cuda
- arch: rocm
- arch: xpu
- arch: hpu

runs-on: ubuntu-latest
runs-on: ubuntu-latest

permissions:
contents: read
packages: write

steps:
- uses: easimon/maximize-build-space@master
with:
remove-dotnet: 'true'
remove-codeql: 'true'
remove-haskell: 'true'
remove-android: 'true'
build-mount-path: /home/runner/work/milabench/

- name: Show all images
run: |
docker image ls
Expand All @@ -48,10 +55,19 @@ jobs:
# The images are still on github registry
docker image prune -f -a --filter "until=336h"
docker system prune -f
sudo apt install jq -y
jq '. + { "data-root": "/home/runner/work/milabench/docker" }' /etc/docker/daemon.json > newconfig.json
sudo mv -f newconfig.json /etc/docker/daemon.json
cat /etc/docker/daemon.json
sudo systemctl stop docker.service
sudo systemctl stop docker.socket
sudo systemctl start docker.socket
sudo systemctl start docker.service
docker info

- name: Check out the repo
uses: actions/checkout@v3

- name: Get Image Tag Name
env:
GITHUB_REF_NAME_ENV: ${{ github.ref_name }}
Expand All @@ -62,14 +78,14 @@ jobs:
IMAGE_TAG="${GITHUB_REF_NAME##*/}"
fi
echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV

- name: Log in to the registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for the image
id: meta
uses: docker/metadata-action@v4
Expand Down
18 changes: 11 additions & 7 deletions docker/Dockerfile-cuda
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# FROM ubuntu:22.04

# For cuda-gdb
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04

# Arguments
# ---------
Expand All @@ -15,6 +15,10 @@ ENV MILABENCH_GPU_ARCH=$ARCH
ENV MILABENCH_CONFIG_NAME=$CONFIG
ENV MILABENCH_DOCKER=1

ENV CUDA_VER=12.1
ENV MELLANOX_KEY="https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox"
ENV MELLANOX_LIST="https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list"

# Paths
# -----

Expand All @@ -41,18 +45,18 @@ COPY . /milabench/milabench/

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y &&\
apt-get install -y --no-install-recommends git build-essential curl python3 python-is-python3 python3-pip &&\
curl -o /etc/apt/trusted.gpg.d/mellanox.asc https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox &&\
curl -o /etc/apt/sources.list.d/mellanox.list https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list &&\
apt-get install -y --no-install-recommends git build-essential curl python3.10 python-is-python3 python3-pip &&\
curl -o /etc/apt/trusted.gpg.d/mellanox.asc $MELLANOX_KEY &&\
curl -o /etc/apt/sources.list.d/mellanox.list $MELLANOX_LIST &&\
apt-get update -y &&\
apt-get install -y --no-install-recommends libibverbs1 &&\
apt-get clean &&\
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/*

# Install Rust
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
ENV CUDA_HOME=/usr/local/cuda-11.8
ENV CUDA_HOME="/usr/local/cuda-${CUDA_VER}"

# Install Milabench
# -----------------
Expand All @@ -71,6 +75,6 @@ RUN python -m pip install -U pip &&\
ENV PIP_DEFAULT_TIMEOUT=800

RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\
python -m pip cache purge
python -m pip cache purge

CMD milabench run
80 changes: 80 additions & 0 deletions docker/Dockerfile-hpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

FROM ubuntu:22.04

# Arguments
# ---------

ARG ARCH=hpu
ENV MILABENCH_GPU_ARCH=$ARCH

ARG CONFIG=standard.yaml
ENV MILABENCH_CONFIG_NAME=$CONFIG
ENV MILABENCH_DOCKER=1

ARG PYTHON="3.10"

ENV HABANA_INSTALLER=https://vault.habana.ai/artifactory/gaudi-installer/1.16.1/habanalabs-installer.sh

# Paths
# -----

ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME
ENV MILABENCH_BASE=/milabench/envs
ENV MILABENCH_ARGS=""
ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs"
ENV BENCHMARK_VENV="$MILABENCH_BASE/venv"


# Copy milabench
# --------------

WORKDIR /milabench
COPY . /milabench/milabench/


# Install Dependencies
# --------------------

# curl: used to download anaconda
# git: used by milabench
# rustc: used by BERT models inside https://pypi.org/project/tokenizers/
# build-essential: for rust

RUN apt-get update &&\
apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\
apt-get clean &&\
rm -rf /var/lib/apt/lists/* &&\
curl -L -o habana_installer.sh -s ${HABANA_INSTALLER} &&\
chmod +x habana_installer.sh

RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Install Milabench
# -----------------

# Have to install habana in the system env too...
# so we can monitor the HPU..
RUN python -m pip install -U pip &&\
python -m pip install -U setuptools &&\
python -m pip install -U poetry &&\
python -m pip install -e /milabench/milabench/ &&\
./habana_installer.sh install -t dependencies &&\
./habana_installer.sh install -t pytorch &&\
python -m pip cache purge

# Prepare bench
# -------------

# pip times out often when downloading pytorch
ENV PIP_DEFAULT_TIMEOUT=800
ENV HABANALABS_VIRTUAL_DIR=$BENCHMARK_VENV/torch

# Install habana in the benchmark environment
RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\
./habana_installer.sh install -t dependencies --venv -y &&\
./habana_installer.sh install -t pytorch --venv -y &&\
python -m pip cache purge &&\
rm -rf habana_installer.sh

CMD ["milabench", "run"]
19 changes: 6 additions & 13 deletions docker/Dockerfile-rocm
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

FROM ubuntu:22.04

# Arguments
Expand All @@ -10,16 +11,17 @@ ARG CONFIG=standard.yaml
ENV MILABENCH_CONFIG_NAME=$CONFIG
ENV MILABENCH_DOCKER=1

ARG PYTHON="3.10"


# Paths
# -----

ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME
ENV MILABENCH_BASE=/milabench/envs
ENV MILABENCH_OUTPUT=/milabench/results/
ENV MILABENCH_ARGS=""
ENV CONDA_PATH=/opt/anaconda

ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs"
ENV BENCHMARK_VENV="$MILABENCH_BASE/venv"

# Copy milabench
# --------------
Expand All @@ -37,22 +39,13 @@ COPY . /milabench/milabench/
# build-essential: for rust

RUN apt-get update &&\
apt-get install -y git build-essential curl &&\
apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\
apt-get clean &&\
rm -rf /var/lib/apt/lists/*

RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Install Python
# --------------

# Install anaconda because milabench will need it later anyway
RUN curl https://repo.anaconda.com/miniconda/Miniconda3-py39_23.1.0-1-Linux-x86_64.sh -o ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p $CONDA_PATH && rm ~/miniconda.sh
ENV PATH=$CONDA_PATH/bin:$PATH


# Install Milabench
# -----------------

Expand Down
79 changes: 79 additions & 0 deletions docker/Dockerfile-xpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@


FROM ubuntu:22.04

# Arguments
# ---------

ARG ARCH=xpu
ENV MILABENCH_GPU_ARCH=$ARCH

ARG CONFIG=standard.yaml
ENV MILABENCH_CONFIG_NAME=$CONFIG
ENV MILABENCH_DOCKER=1

ARG PYTHON="3.10"

ENV XPU_MANAGER="V1.2.36/xpumanager_1.2.36_20240428.081009.377f9162.u22.04_amd64.deb"

# Paths
# -----

ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME
ENV MILABENCH_BASE=/milabench/base
ENV MILABENCH_ARGS=""

ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs"
ENV BENCHMARK_VENV="$MILABENCH_BASE/venv"

# Copy milabench
# --------------

WORKDIR /milabench
COPY . /milabench/milabench/


# Install Dependencies
# --------------------

# curl: used to download anaconda
# git: used by milabench
# rustc: used by BERT models inside https://pypi.org/project/tokenizers/
# build-essential: for rust

RUN apt-get update &&\
apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\
apt-get clean &&\
rm -rf /var/lib/apt/lists/* &&\
curl -L -o xpu_manager.deb -s https://github.com/intel/xpumanager/releases/download/${XPU_MANAGER} &&\
dpkg -i xpu_manager.deb &&\
rm -rf xpu_manager.deb


RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

# Install Milabench
# -----------------

RUN python -m pip install -U pip &&\
python -m pip install -U setuptools &&\
python -m pip install -U poetry &&\
python -m pip install -e /milabench/milabench/ &&\
python -m pip cache purge


# Prepare bench
# -------------

# pip times out often when downloading pytorch
ENV PIP_DEFAULT_TIMEOUT=800

# Uninstall default pytorch
# reinstall pytorch with the right extensions...
RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\
/bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip uninstall torch torchvision torchaudio" &&\
/bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip install torch torchvision torchaudio intel-extension-for-pytorch oneccl_bind_pt intel-extension-for-pytorch-deepspeed --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" &&\
python -m pip cache purge

CMD ["milabench", "run"]
Loading