Skip to content

build

build #130

Workflow file for this run

name: "build"
on:
schedule:
# Run weekly on Saturday at midnight PT (3am ET / 8am UTC)
- cron: "0 8 * * 6"
push:
branches:
- "master"
pull_request:
workflow_dispatch:
concurrency:
group: build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
build-dependencies:
name: Build the FlexFlow dependencies
strategy:
matrix:
os: [ubuntu-18.04, ubuntu-20.04]
cuda_version:
[
"10.1.243",
"10.2.89",
"11.0.3",
"11.1.1",
"11.2.2",
"11.3.1",
"11.4.3",
"11.5.2",
"11.6.2",
"11.7.0",
]
gpu_backend: [cuda, hip_rocm]
# uncomment the line below (and related ones) to build nccl, legion in parallel. Because
# git only supports up to 20 jobs in parallel, building in parallel is currently not needed.
#dependency: ["nccl", "legion"]
exclude:
- os: ubuntu-20.04
cuda_version: "10.1.243"
- os: ubuntu-20.04
cuda_version: "10.2.89"
# Only pre-compile hip_rocm on Ubuntu 20.04 with one single version of CUDA (although CUDA is not necessary)
- os: ubuntu-18.04
gpu_backend: "hip_rocm"
- cuda_version: "10.1.243"
gpu_backend: "hip_rocm"
- cuda_version: "10.2.89"
gpu_backend: "hip_rocm"
- cuda_version: "11.0.3"
gpu_backend: "hip_rocm"
- cuda_version: "11.2.2"
gpu_backend: "hip_rocm"
- cuda_version: "11.3.1"
gpu_backend: "hip_rocm"
- cuda_version: "11.4.3"
gpu_backend: "hip_rocm"
- cuda_version: "11.5.2"
gpu_backend: "hip_rocm"
- cuda_version: "11.6.2"
gpu_backend: "hip_rocm"
- cuda_version: "11.7.0"
gpu_backend: "hip_rocm"
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Free additional space on runner
run: .github/workflows/helpers/free_space_on_runner.sh
- name: Install CUDA
uses: Jimver/cuda-toolkit@v0.2.8
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda_version }}
use-github-cache: "false"
- name: Install Build Dependencies
env:
CUDA_VERSION: ${{ matrix.cuda_version }}
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
run: .github/workflows/helpers/install_dependencies.sh
- name: Build NCCL/Legion
env:
#DEPENDENCY: ${{ matrix.dependency }}
CUDA_VERSION: ${{ matrix.cuda_version }}
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
run: |
export PATH=/opt/conda/bin:$PATH
export CUDNN_DIR=/usr/local/cuda
export CUDA_DIR=/usr/local/cuda
export FF_BUILD_LEGION=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_USE_NCCL=ON
fi
cores_available=$(nproc --all)
n_build_cores=$(( cores_available -1 ))
mkdir build
cd build
../config/config.linux
make -j $n_build_cores
../config/config.linux
make install
- name: Prepare library files
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
run: |
# Remove unnecessary files
echo "Removing unnecessary files..."
rm -rf build/deps/nccl/obj build/deps/nccl/src build/deps/nccl/tmp
rm -f build/export/legion/lib/libflexflow.so
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export NCCL_TARBALL="nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz"
export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz"
# Only build NCCL tarball for CUDA backends
echo "Creating archive $NCCL_TARBALL"
tar -zcvf $NCCL_TARBALL build/deps/nccl/
echo "Checking the size of the NCCL tarball..."
du -h $NCCL_TARBALL
else
export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz"
fi
echo "Creating archive $LEGION_TARBALL"
tar -zcvf $LEGION_TARBALL build/export/legion/
echo "Checking the size of the Legion tarball..."
du -h $LEGION_TARBALL
- name: Archive compiled Legion library (CUDA)
uses: actions/upload-artifact@v3
if: ${{ matrix.gpu_backend == 'cuda' }}
with:
name: legion_${{ matrix.os }}_${{ matrix.cuda_version }}
path: legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz
- name: Archive compiled Legion library (HIP)
uses: actions/upload-artifact@v3
if: ${{ matrix.gpu_backend != 'cuda' }}
with:
name: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}
path: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz
- name: Archive compiled NCCL library (CUDA)
uses: actions/upload-artifact@v3
if: ${{ matrix.gpu_backend == 'cuda' }}
with:
name: nccl_${{ matrix.os }}_${{ matrix.cuda_version }}
path: nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz
notify-slack:
name: Notify Slack in case of failure
runs-on: ubuntu-20.04
needs: build-dependencies
if: ${{ failure() && github.event_name == 'schedule' }}
steps:
- name: Send Slack message
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
run: |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly build of FlexFlow dependencies failed! <https://github.com/flexflow/flexflow-third-party/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK
create-release:
name: Create new release
runs-on: ubuntu-20.04
needs: build-dependencies
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
- name: Free additional space on runner
run: .github/workflows/helpers/free_space_on_runner.sh
- name: Create folder for artifacts
run: mkdir artifacts unwrapped_artifacts
- name: Download artifacts
uses: actions/download-artifact@v3
with:
path: ./artifacts
- name: Display structure of downloaded files
working-directory: ./artifacts
run: ls -R
- name: Unwrap all artifacts
working-directory: ./artifacts
run: find . -maxdepth 2 -mindepth 2 -type f -name "*.tar.gz" -exec mv {} ../unwrapped_artifacts/ \;
- name: Get datetime
run: echo "RELEASE_DATETIME=$(date '+%Y-%m-%dT%H-%M-%S')" >> $GITHUB_ENV
- name: Release
env:
NAME: ${{ env.RELEASE_DATETIME }}
TAG_NAME: ${{ env.RELEASE_DATETIME }}
GITHUB_TOKEN: ${{ secrets.FLEXFLOW_TOKEN }}
run: gh release create $TAG_NAME ./unwrapped_artifacts/*.tar.gz