Skip to content

Try to bypass HPU hangs #1290

Try to bypass HPU hangs

Try to bypass HPU hangs #1290

Workflow file for this run

name: tests
on:
# Runs every sunday
schedule:
- cron: '0 0 * * SUN'
# Runs for pull requests
pull_request:
branches:
- master
# Runs on publish
release:
types:
[published]
# Allow manual triggers
workflow_dispatch:
jobs:
tests:
strategy:
fail-fast: false
matrix:
include:
- arch: cuda
exclude : "unsupported-cuda"
# - arch: rocm
# exclude : "unsupported-rocm"
runs-on: [self-hosted, "${{ matrix.arch }}"]
# Cancel previous jobs if a new version was pushed
concurrency:
group: "${{ github.ref }}-${{ matrix.arch }}"
cancel-in-progress: true
defaults:
run:
shell: bash -el {0}
env:
MILABENCH_CONFIG: "config/ci.yaml"
MILABENCH_BASE: "output"
MILABENCH_ARGS: ""
MILABENCH_GPU_ARCH: "${{ matrix.arch }}"
MILABENCH_DASH: "no"
MILABENCH_EXCLUDE: "${{ matrix.exclude }}"
steps:
- uses: actions/checkout@v3
- uses: conda-incubator/setup-miniconda@v2
with:
auto-activate-base: false
python-version: 3.10
miniconda-version: "latest"
activate-environment: test
- name: Pytorch Sanity
run: |
if [[ "${MILABENCH_GPU_ARCH}" == "rocm" ]]; then
groups
/opt/rocm/bin/rocminfo
fi
- name: dependencies
run: |
if [[ ! -d "~/.cargo/bin" ]]; then
wget --no-check-certificate --secure-protocol=TLSv1_2 -qO- https://sh.rustup.rs | sh -s -- -y
fi
export PATH="~/.cargo/bin:${PATH}"
python -m pip install -U pip
python -m pip install -U poetry
poetry lock --no-update
# poetry v1.7 has a bug where it can't find pip during the first
# install attempt:
# Output:
# [...]/.venv/bin/python: can't open file
# '[...]/lib/python3.9/site-packages/virtualenv/seed/wheels/embed/pip-23.3.1-py3-none-any.whl/pip':
# [Errno 2] No such file or directory
! poetry install
poetry install
- name: pin
run: |
MILABENCH_GPU_ARCH=cuda poetry run milabench pin -c constraints/cuda.txt --config config/standard.yaml
MILABENCH_GPU_ARCH=rocm poetry run milabench pin -c constraints/rocm.txt --config config/standard.yaml
MILABENCH_GPU_ARCH=xpu poetry run milabench pin -c constraints/xpu.txt --config config/standard.yaml
git diff --stat
- name: tests
run: |
export PATH="/opt/rocm/bin:$PATH"
pytest --ignore=tests/integration tests/
- name: install benchmarks
run: |
milabench install --exclude "${MILABENCH_EXCLUDE}"
- name: prepare benchmarks
run: |
milabench prepare --exclude "${MILABENCH_EXCLUDE}"
- name: run benchmarks
run: |
export PATH="/opt/rocm/bin:$PATH"
milabench run --validations all --exclude "${MILABENCH_EXCLUDE}"
- name: Summary
run: |
milabench summary $MILABENCH_BASE/runs/