build #130
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "build" | |
on: | |
schedule: | |
# Run weekly on Saturday at midnight PT (3am ET / 8am UTC) | |
- cron: "0 8 * * 6" | |
push: | |
branches: | |
- "master" | |
pull_request: | |
workflow_dispatch: | |
concurrency: | |
group: build-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
build-dependencies: | |
name: Build the FlexFlow dependencies | |
strategy: | |
matrix: | |
os: [ubuntu-18.04, ubuntu-20.04] | |
cuda_version: | |
[ | |
"10.1.243", | |
"10.2.89", | |
"11.0.3", | |
"11.1.1", | |
"11.2.2", | |
"11.3.1", | |
"11.4.3", | |
"11.5.2", | |
"11.6.2", | |
"11.7.0", | |
] | |
gpu_backend: [cuda, hip_rocm] | |
# uncomment the line below (and related ones) to build nccl, legion in parallel. Because | |
# git only supports up to 20 jobs in parallel, building in parallel is currently not needed. | |
#dependency: ["nccl", "legion"] | |
exclude: | |
- os: ubuntu-20.04 | |
cuda_version: "10.1.243" | |
- os: ubuntu-20.04 | |
cuda_version: "10.2.89" | |
# Only pre-compile hip_rocm on Ubuntu 20.04 with one single version of CUDA (although CUDA is not necessary) | |
- os: ubuntu-18.04 | |
gpu_backend: "hip_rocm" | |
- cuda_version: "10.1.243" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "10.2.89" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.0.3" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.2.2" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.3.1" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.4.3" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.5.2" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.6.2" | |
gpu_backend: "hip_rocm" | |
- cuda_version: "11.7.0" | |
gpu_backend: "hip_rocm" | |
fail-fast: false | |
runs-on: ${{ matrix.os }} | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
with: | |
submodules: recursive | |
- name: Free additional space on runner | |
run: .github/workflows/helpers/free_space_on_runner.sh | |
- name: Install CUDA | |
uses: Jimver/cuda-toolkit@v0.2.8 | |
id: cuda-toolkit | |
with: | |
cuda: ${{ matrix.cuda_version }} | |
use-github-cache: "false" | |
- name: Install Build Dependencies | |
env: | |
CUDA_VERSION: ${{ matrix.cuda_version }} | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
run: .github/workflows/helpers/install_dependencies.sh | |
- name: Build NCCL/Legion | |
env: | |
#DEPENDENCY: ${{ matrix.dependency }} | |
CUDA_VERSION: ${{ matrix.cuda_version }} | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
run: | | |
export PATH=/opt/conda/bin:$PATH | |
export CUDNN_DIR=/usr/local/cuda | |
export CUDA_DIR=/usr/local/cuda | |
export FF_BUILD_LEGION=ON | |
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then | |
export FF_USE_NCCL=ON | |
fi | |
cores_available=$(nproc --all) | |
n_build_cores=$(( cores_available -1 )) | |
mkdir build | |
cd build | |
../config/config.linux | |
make -j $n_build_cores | |
../config/config.linux | |
make install | |
- name: Prepare library files | |
env: | |
FF_GPU_BACKEND: ${{ matrix.gpu_backend }} | |
run: | | |
# Remove unnecessary files | |
echo "Removing unnecessary files..." | |
rm -rf build/deps/nccl/obj build/deps/nccl/src build/deps/nccl/tmp | |
rm -f build/export/legion/lib/libflexflow.so | |
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then | |
export NCCL_TARBALL="nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz" | |
export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz" | |
# Only build NCCL tarball for CUDA backends | |
echo "Creating archive $NCCL_TARBALL" | |
tar -zcvf $NCCL_TARBALL build/deps/nccl/ | |
echo "Checking the size of the NCCL tarball..." | |
du -h $NCCL_TARBALL | |
else | |
export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz" | |
fi | |
echo "Creating archive $LEGION_TARBALL" | |
tar -zcvf $LEGION_TARBALL build/export/legion/ | |
echo "Checking the size of the Legion tarball..." | |
du -h $LEGION_TARBALL | |
- name: Archive compiled Legion library (CUDA) | |
uses: actions/upload-artifact@v3 | |
if: ${{ matrix.gpu_backend == 'cuda' }} | |
with: | |
name: legion_${{ matrix.os }}_${{ matrix.cuda_version }} | |
path: legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz | |
- name: Archive compiled Legion library (HIP) | |
uses: actions/upload-artifact@v3 | |
if: ${{ matrix.gpu_backend != 'cuda' }} | |
with: | |
name: legion_${{ matrix.os }}_${{ matrix.gpu_backend }} | |
path: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz | |
- name: Archive compiled NCCL library (CUDA) | |
uses: actions/upload-artifact@v3 | |
if: ${{ matrix.gpu_backend == 'cuda' }} | |
with: | |
name: nccl_${{ matrix.os }}_${{ matrix.cuda_version }} | |
path: nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz | |
notify-slack: | |
name: Notify Slack in case of failure | |
runs-on: ubuntu-20.04 | |
needs: build-dependencies | |
if: ${{ failure() && github.event_name == 'schedule' }} | |
steps: | |
- name: Send Slack message | |
env: | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
run: | | |
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly build of FlexFlow dependencies failed! <https://github.com/flexflow/flexflow-third-party/actions/runs/$GITHUB_RUN_ID|(See here).> :x: \"}" $SLACK_WEBHOOK | |
create-release: | |
name: Create new release | |
runs-on: ubuntu-20.04 | |
needs: build-dependencies | |
steps: | |
- name: Checkout Git Repository | |
uses: actions/checkout@v3 | |
- name: Free additional space on runner | |
run: .github/workflows/helpers/free_space_on_runner.sh | |
- name: Create folder for artifacts | |
run: mkdir artifacts unwrapped_artifacts | |
- name: Download artifacts | |
uses: actions/download-artifact@v3 | |
with: | |
path: ./artifacts | |
- name: Display structure of downloaded files | |
working-directory: ./artifacts | |
run: ls -R | |
- name: Unwrap all artifacts | |
working-directory: ./artifacts | |
run: find . -maxdepth 2 -mindepth 2 -type f -name "*.tar.gz" -exec mv {} ../unwrapped_artifacts/ \; | |
- name: Get datetime | |
run: echo "RELEASE_DATETIME=$(date '+%Y-%m-%dT%H-%M-%S')" >> $GITHUB_ENV | |
- name: Release | |
env: | |
NAME: ${{ env.RELEASE_DATETIME }} | |
TAG_NAME: ${{ env.RELEASE_DATETIME }} | |
GITHUB_TOKEN: ${{ secrets.FLEXFLOW_TOKEN }} | |
run: gh release create $TAG_NAME ./unwrapped_artifacts/*.tar.gz |