Skip to content

Commit

Permalink
ci: run package test in docker (#345)
Browse files Browse the repository at this point in the history
  • Loading branch information
guocuimi authored Oct 15, 2024
1 parent 785b3b7 commit 8e0320b
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 104 deletions.
49 changes: 12 additions & 37 deletions .github/workflows/package_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.10"]
python: ["3.12"]
cuda: ["12.4"]
torch: ["2.4.1"]
runs-on: [self-hosted, linux, build]
Expand Down Expand Up @@ -86,40 +86,15 @@ jobs:

- name: Install the package and run pytest
timeout-minutes: 10
shell: bash -l {0}
run: |
# define the environment name based on the python and torch versions
ENV_NAME=pkg_test_py${PYTHON_VERSION//./}_cu${CUDA_VERSION//./}_torch${TORCH_VERSION//./}
# Initialize conda environment
source ~/anaconda3/etc/profile.d/conda.sh
conda init --all && source ~/.bashrc
# Create a new conda environment
if conda info --envs | grep -q ${ENV_NAME}; then
echo "${ENV_NAME} already exists"
else
conda create -y -n ${ENV_NAME} python=${PYTHON_VERSION};
fi
# Activate the conda environment
conda activate ${ENV_NAME}
# Install PyTorch
pip install torch==${TORCH_VERSION} -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"
# Uninstall previous version of the package and install the new one
pip uninstall -y scalellm
pip install dist/*.whl
# Install the test requirements
pip install -r requirements-test.txt
# Run pytest
printf "\n\nRunning pytest\n\n"
cd tests
python3 -m pytest || exit 1
printf "\n\n"
# Deactivate the conda environment
conda deactivate || true
docker pull pytorch/manylinux-builder:cuda${CUDA_VERSION}
docker run --rm -t --gpus=all \
-v "$CI_CACHE_DIR":/ci_cache \
-v "$GITHUB_WORKSPACE":/ScaleLLM \
-e PYTHON_VERSION=${PYTHON_VERSION} \
-e CUDA_VERSION=${CUDA_VERSION} \
-e TORCH_VERSION=${TORCH_VERSION} \
-e PIP_CACHE_DIR=/ci_cache/.pip \
-u $(id -u):$(id -g) \
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
bash /ScaleLLM/scripts/run_pytest.sh
38 changes: 9 additions & 29 deletions .github/workflows/publish_manylinux_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ env:

jobs:
publish_base:
strategy:
fail-fast: false
matrix:
cuda: ["11.8", "12.1", "12.4"]
runs-on: [self-hosted, linux, release]
env:
CUDA_VERSION: ${{ matrix.cuda }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -27,7 +33,7 @@ jobs:
- name: Create cache directory
run: mkdir -p $CI_CACHE_DIR/.buildx-cache

- name: Build base for cuda 12.4
- name: Build base for cuda ${{ matrix.cuda }}
uses: docker/build-push-action@v5
with:
context: ./docker
Expand All @@ -36,33 +42,7 @@ jobs:
cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
build-args: |
CUDA_VERSION=12.4
CUDA_VERSION=${CUDA_VERSION}
tags: |
vectorchai/scalellm_manylinux:cuda12.4
- name: Build base for cuda 12.1
uses: docker/build-push-action@v5
with:
context: ./docker
file: ./docker/Dockerfile.manylinux
push: true
cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
build-args: |
CUDA_VERSION=12.1
tags: |
vectorchai/scalellm_manylinux:cuda12.1
- name: Build base for cuda 11.8
uses: docker/build-push-action@v5
with:
context: ./docker
file: ./docker/Dockerfile.manylinux
push: true
cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
build-args: |
CUDA_VERSION=11.8
tags: |
vectorchai/scalellm_manylinux:cuda11.8
vectorchai/scalellm_manylinux:cuda${CUDA_VERSION}
51 changes: 13 additions & 38 deletions .github/workflows/release_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.10"]
cuda: ["12.4"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cuda: ["12.1"]
torch: ["2.4.1"]
runs-on: [self-hosted, linux, release]
env:
Expand Down Expand Up @@ -56,40 +56,15 @@ jobs:

- name: Install the package and run pytest
timeout-minutes: 10
shell: bash -l {0}
run: |
# define the environment name based on the python and torch versions
ENV_NAME=pkg_test_py${PYTHON_VERSION//./}_cu${CUDA_VERSION//./}_torch${TORCH_VERSION//./}
# Initialize conda environment
source ~/anaconda3/etc/profile.d/conda.sh
conda init --all && source ~/.bashrc
# Create a new conda environment
if conda info --envs | grep -q ${ENV_NAME}; then
echo "${ENV_NAME} already exists"
else
conda create -y -n ${ENV_NAME} python=${PYTHON_VERSION};
fi
# Activate the conda environment
conda activate ${ENV_NAME}
# Install PyTorch
pip install torch==${TORCH_VERSION} -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"
# Uninstall previous version of the package and install the new one
pip uninstall -y scalellm
pip install dist/*.whl
# Install the test requirements
pip install -r requirements-test.txt
# Run pytest
printf "\n\nRunning pytest\n\n"
cd tests
python3 -m pytest || exit 1
printf "\n\n"
# Deactivate the conda environment
conda deactivate || true
docker pull pytorch/manylinux-builder:cuda${CUDA_VERSION}
docker run --rm -t --gpus=all \
-v "$CI_CACHE_DIR":/ci_cache \
-v "$GITHUB_WORKSPACE":/ScaleLLM \
-e PYTHON_VERSION=${PYTHON_VERSION} \
-e CUDA_VERSION=${CUDA_VERSION} \
-e TORCH_VERSION=${TORCH_VERSION} \
-e PIP_CACHE_DIR=/ci_cache/.pip \
-u $(id -u):$(id -g) \
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
bash /ScaleLLM/scripts/run_pytest.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# deps
/.deps

# pip cache
/.pip

# libtorch
/libtorch

Expand Down
38 changes: 38 additions & 0 deletions scripts/run_pytest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
set -e

ensure_env() {
local var_name="$1"
if [ -z "${!var_name}" ]; then
echo "Error: Environment variable '$var_name' is not set."
exit 1
fi
}

ensure_env PYTHON_VERSION
ensure_env TORCH_VERSION
ensure_env CUDA_VERSION

PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$PROJECT_ROOT"

export HOME=/tmp/home
mkdir -p $HOME
export PATH="$HOME/.local/bin:$PATH"

# choose python version
PYVER="${PYTHON_VERSION//./}"
export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"

# install PyTorch
pip install torch==$TORCH_VERSION -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"

# install dependencies
pip install -r requirements-test.txt

# install scalellm wheel
pip install dist/*.whl

# run pytest
cd tests
pytest

0 comments on commit 8e0320b

Please sign in to comment.