Skip to content

Commit

Permalink
Add nightly rosetta build and test
Browse files Browse the repository at this point in the history
  • Loading branch information
DwarKapex committed Nov 16, 2023
1 parent ccafb52 commit 6974a3a
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 215 deletions.
172 changes: 88 additions & 84 deletions .github/workflows/_build_rosetta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ name: ~build Rosetta container
on:
workflow_call:
inputs:
ARCHITECTURE:
type: string
description: 'CPU architecture to build the image for, e.g. amd64, arm64'
required: true
BASE_LIBRARY:
type: string
description: 'Choice of base library to build on:'
Expand All @@ -14,18 +18,21 @@ on:
required: false
BUILD_DATE:
type: string
description: "Build date in YYYY-MM-DD format"
description: 'Build date in YYYY-MM-DD format'
required: false
default: 'NOT SPECIFIED'
PLATFORMS:
BADGE_FILENAME:
type: string
description: 'JSON list of platforms. Ex: ["amd64"]'
description: 'Name of the endpoint JSON file for shields.io badge'
required: false
default: '["arm64", "amd64"]'
default: 'badge-rosetta-build'
outputs:
DOCKER_TAGS:
description: "Tags of the image built"
value: ${{ jobs.merge.outputs.DOCKER_TAGS }}
DOCKER_TAG_MEALKIT:
description: 'Tags of the mealkit image build'
value: $ {{ jobs.build-rosetta.output.DOCKER_TAG_MEALKIT }}
DOCKER_TAG_FINAL:
description: "Tags of the complete image built"
value: ${{ jobs.build-rosetta.outputs.DOCKER_TAG_FINAL }}

env:
UPLD_IMAGE: ghcr.io/nvidia/jax-toolbox-internal
Expand All @@ -38,13 +45,17 @@ permissions:

jobs:

build:
strategy:
fail-fast: false
matrix:
PLATFORM: ${{ fromJSON(inputs.PLATFORMS) }}
runs-on: [self-hosted, "${{ matrix.PLATFORM }}", small]
build-rosetta:
runs-on: [self-hosted, "${{ input.ARCHITECTURE }}", small]
env:
BADGE_FILENAME_FULL: ${{ input.BADGE_FILENAME}}-${{ input.ARCHITECTURE}}.json
output:
DOCKER_TAG_MEALKIT: ${{ steps.mealkit-metadata.outputs.tags }}
DOCKER_TAG_FINAL: ${{ steps.final-metadata.outputs.tags }}
steps:
- name: Print environment variables
run: env

- name: Set default BASE_IMAGE
id: defaults
run: |
Expand All @@ -54,9 +65,6 @@ jobs:
echo "BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ inputs.BASE_LIBRARY }}:latest" >> "$GITHUB_OUTPUT"
fi
- name: Print environment variables
run: env

- name: Check out the repository under ${GITHUB_WORKSPACE}
uses: actions/checkout@v3

Expand All @@ -67,92 +75,88 @@ jobs:
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set docker metadata
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.UPLD_IMAGE }}
flavor: latest=false
tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ matrix.PLATFORM }}
labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
driver-opts: |
image=moby/buildkit:v0.12.1
- name: Build docker images
- name: Set docker metadata - mealkit
id: mealkit-metadata
uses: docker/metadata-action@v4
with:
images: ${{ env.UPLD_IMAGE }}
flavor: latest=false
tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ input.ARCHITECHTURE }}-mealkit
labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }}

- name: Build docker images - mealkit
id: mealkit-build
uses: docker/build-push-action@v4
with:
context: rosetta/
push: true
file: rosetta/Dockerfile.${{ inputs.BASE_LIBRARY }}
platforms: linux/${{ matrix.PLATFORM }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/${{ input.ARCHITECHTURE }}
tags: ${{ steps.mealkit-metadata.outputs.tags }}
labels: ${{ steps.mealkit-metadata.outputs.labels }}
target: rosetta
build-args: |
BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }}
# Temporary workaround until the following issues are solved:
# https://github.com/orgs/community/discussions/17245
# https://github.com/actions/runner/pull/2477
# https://github.com/orgs/community/discussions/26639
- name: Save image name as text file
shell: bash -x -e {0}
run: |
echo "${{ steps.meta.outputs.tags }}" >> image-name.txt
- name: Upload image name file as artifact
uses: actions/upload-artifact@v3
- name: Set docker metadata - final
id: final-metadata
uses: docker/metadata-action@v4
with:
name: image-name-${{ inputs.BASE_LIBRARY }}-${{ matrix.PLATFORM }}
path: image-name.txt

merge:
runs-on: ubuntu-latest
needs: build
outputs:
DOCKER_TAGS: ${{ steps.meta.outputs.tags }}
steps:
# TODO: currently downloading all artifacts of the entire workflow
# Revise when this request is fulfilled:
# https://github.com/actions/download-artifact/issues/214
- name: Download image name files into separate folders
uses: actions/download-artifact@v3
images: ${{ env.UPLD_IMAGE }}
flavor: latest=false
tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ input.ARCHITECHTURE }}-final
labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }}

- name: Login to GitHub Container Registry
uses: docker/login-action@v2
- name: Build docker images - final
uses: docker/build-push-action@v4
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
context: rosetta/
push: true
file: rosetta/Dockerfile.${{ inputs.BASE_LIBRARY }}
platforms: linux/${{ input.ARCHITECHTURE }}
tags: ${{ steps.final-metadata.outputs.tags }}
labels: ${{ steps.final-metadata.outputs.labels }}
target: rosetta
build-args: |
BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }}
- name: Set docker metadata
id: meta
uses: docker/metadata-action@v4
with:
images: |
${{ env.UPLD_IMAGE }}
flavor: |
latest=false
tags: |
type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-multiarch
labels:
org.opencontainers.image.created=${{ inputs.BUILD_DATE }}

- name: Combine images into a single multi-arch image
- name: Generate sitrep
if: success() || failure()
shell: bash -x -e {0}
run: |
docker manifest create ${{ steps.meta.outputs.tags }} $(
for IMAGE in $(cat image-name-${{ inputs.BASE_LIBRARY }}-*/image-name.txt); do
REPO=$(echo $IMAGE | cut -d: -f1)
DIGEST=$(
docker manifest inspect $IMAGE |\
jq -r '.manifests[] | select(.platform.os == "linux") | .digest'
)
echo $REPO@${DIGEST}
done
)
docker manifest push ${{ steps.meta.outputs.tags }}
# bring in utility functions
source .github/workflows/scripts/to_json.sh
badge_label='JAX ${{ inputs.ARCHITECTURE }} build'
tags="${{ steps.final-metadata.outputs.tags }}"
digest="${{ steps.final-build.outputs.digest }}"
outcome="${{ steps.final-build.outcome }}"
if [[ ${outcome} == "success" ]]; then
badge_message="pass"
badge_color=brightgreen
summary="JAX build on ${{ inputs.ARCHITECTURE }}: $badge_message"
else
badge_message="fail"
badge_color=red
summary="JAX build on ${{ inputs.ARCHITECTURE }}: $badge_message"
fi
to_json \
summary \
badge_label tags digest outcome \
> sitrep.json
schemaVersion=1 \
label="${badge_label}" \
message="${badge_message}" \
color="${badge_color}" \
to_json schemaVersion label message color \
> ${{ env.BADGE_FILENAME_FULL }}
8 changes: 8 additions & 0 deletions .github/workflows/_sandbox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ jobs:
TE_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAG_FINAL }}
secrets: inherit

build-rosetta:
needs: [build-pax]
uses: ./.github/worklows/_build_rosetta.yaml
with:
ARCHITECTURE: amd64
BASE_LIBRARY: pax
BASE_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAG_FINAL }}

# test-pax:
# needs: [build-pax]
# uses: ./.github/workflows/_test_pax.yaml
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/_test_pax_rosetta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: ~test Pax, MGMN
name: ~test Pax, multi-node

on:
workflow_call:
Expand All @@ -20,7 +20,7 @@ on:

jobs:

multi-gpu-multi-node-te:
rosetta-pax-multi-node-te:
strategy:
matrix:
PARALLEL_CONFIG:
Expand Down Expand Up @@ -157,7 +157,7 @@ jobs:
name: ${{ steps.meta.outputs.JOB_NAME }}
path: output/*

multi-gpu-multi-node:
rosetta-pax-multi-node:
strategy:
matrix:
PARALLEL_CONFIG:
Expand Down Expand Up @@ -292,7 +292,7 @@ jobs:
path: output/*


multi-gpu-single-node-dropout-te:
rosetta-pax-single-node-dropout-te:
strategy:
matrix:
PARALLEL_CONFIG:
Expand Down Expand Up @@ -428,7 +428,7 @@ jobs:


metrics:
needs: [multi-gpu-multi-node, multi-gpu-multi-node-te, multi-gpu-single-node-dropout-te]
needs: [rosetta-pax-multi-node, rosetta-pax-multi-node-te, rosetta-pax-single-node-dropout-te]
runs-on: ubuntu-22.04

steps:
Expand Down Expand Up @@ -466,7 +466,7 @@ jobs:


publish-test:
needs: [multi-gpu-multi-node, multi-gpu-multi-node-te, multi-gpu-single-node-dropout-te, metrics]
needs: [rosetta-pax-multi-node, rosetta-pax-multi-node-te, rosetta-pax-single-node-dropout-te, metrics]
uses: ./.github/workflows/_publish_badge.yaml
if: ( always() )
secrets: inherit
Expand Down
Loading

0 comments on commit 6974a3a

Please sign in to comment.