From 6974a3adcd73312b2f064faad5d18e68e3484147 Mon Sep 17 00:00:00 2001 From: Vladislav Kozlov Date: Wed, 15 Nov 2023 21:52:41 -0700 Subject: [PATCH] Add nightly rosetta build and test --- .github/workflows/_build_rosetta.yaml | 172 +++++++++--------- .github/workflows/_sandbox.yaml | 8 + .github/workflows/_test_pax_rosetta.yaml | 12 +- ...ml => nightly-rosetta-pax-build-test.yaml} | 137 ++++++++------ .../nightly-rosetta-t5x-build-test.yaml | 139 ++++++++------ .github/workflows/nightly-t5x-build.yaml | 2 +- .github/workflows/nightly-te-test.yaml | 6 +- rosetta/Dockerfile.pax | 2 +- rosetta/Dockerfile.t5x | 2 +- 9 files changed, 265 insertions(+), 215 deletions(-) rename .github/workflows/{nightly-rosetta-pax-build.yaml => nightly-rosetta-pax-build-test.yaml} (54%) diff --git a/.github/workflows/_build_rosetta.yaml b/.github/workflows/_build_rosetta.yaml index e811e31bf..1f6509e8c 100644 --- a/.github/workflows/_build_rosetta.yaml +++ b/.github/workflows/_build_rosetta.yaml @@ -3,6 +3,10 @@ name: ~build Rosetta container on: workflow_call: inputs: + ARCHITECTURE: + type: string + description: 'CPU architecture to build the image for, e.g. amd64, arm64' + required: true BASE_LIBRARY: type: string description: 'Choice of base library to build on:' @@ -14,18 +18,21 @@ on: required: false BUILD_DATE: type: string - description: "Build date in YYYY-MM-DD format" + description: 'Build date in YYYY-MM-DD format' required: false default: 'NOT SPECIFIED' - PLATFORMS: + BADGE_FILENAME: type: string - description: 'JSON list of platforms. Ex: ["amd64"]' + description: 'Name of the endpoint JSON file for shields.io badge' required: false - default: '["arm64", "amd64"]' + default: 'badge-rosetta-build' outputs: - DOCKER_TAGS: - description: "Tags of the image built" - value: ${{ jobs.merge.outputs.DOCKER_TAGS }} + DOCKER_TAG_MEALKIT: + description: 'Tags of the mealkit image build' + value: $ {{ jobs.build-rosetta.output.DOCKER_TAG_MEALKIT }} + DOCKER_TAG_FINAL: + description: "Tags of the complete image built" + value: ${{ jobs.build-rosetta.outputs.DOCKER_TAG_FINAL }} env: UPLD_IMAGE: ghcr.io/nvidia/jax-toolbox-internal @@ -38,13 +45,17 @@ permissions: jobs: - build: - strategy: - fail-fast: false - matrix: - PLATFORM: ${{ fromJSON(inputs.PLATFORMS) }} - runs-on: [self-hosted, "${{ matrix.PLATFORM }}", small] + build-rosetta: + runs-on: [self-hosted, "${{ input.ARCHITECTURE }}", small] + env: + BADGE_FILENAME_FULL: ${{ input.BADGE_FILENAME}}-${{ input.ARCHITECTURE}}.json + output: + DOCKER_TAG_MEALKIT: ${{ steps.mealkit-metadata.outputs.tags }} + DOCKER_TAG_FINAL: ${{ steps.final-metadata.outputs.tags }} steps: + - name: Print environment variables + run: env + - name: Set default BASE_IMAGE id: defaults run: | @@ -54,9 +65,6 @@ jobs: echo "BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ inputs.BASE_LIBRARY }}:latest" >> "$GITHUB_OUTPUT" fi - - name: Print environment variables - run: env - - name: Check out the repository under ${GITHUB_WORKSPACE} uses: actions/checkout@v3 @@ -67,92 +75,88 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Set docker metadata - id: meta - uses: docker/metadata-action@v4 - with: - images: ${{ env.UPLD_IMAGE }} - flavor: latest=false - tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ matrix.PLATFORM }} - labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }} - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 with: driver-opts: | image=moby/buildkit:v0.12.1 - - name: Build docker images + - name: Set docker metadata - mealkit + id: mealkit-metadata + uses: docker/metadata-action@v4 + with: + images: ${{ env.UPLD_IMAGE }} + flavor: latest=false + tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ input.ARCHITECHTURE }}-mealkit + labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }} + + - name: Build docker images - mealkit + id: mealkit-build uses: docker/build-push-action@v4 with: context: rosetta/ push: true file: rosetta/Dockerfile.${{ inputs.BASE_LIBRARY }} - platforms: linux/${{ matrix.PLATFORM }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} + platforms: linux/${{ input.ARCHITECHTURE }} + tags: ${{ steps.mealkit-metadata.outputs.tags }} + labels: ${{ steps.mealkit-metadata.outputs.labels }} target: rosetta build-args: | BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }} - # Temporary workaround until the following issues are solved: - # https://github.com/orgs/community/discussions/17245 - # https://github.com/actions/runner/pull/2477 - # https://github.com/orgs/community/discussions/26639 - - name: Save image name as text file - shell: bash -x -e {0} - run: | - echo "${{ steps.meta.outputs.tags }}" >> image-name.txt - - - name: Upload image name file as artifact - uses: actions/upload-artifact@v3 + - name: Set docker metadata - final + id: final-metadata + uses: docker/metadata-action@v4 with: - name: image-name-${{ inputs.BASE_LIBRARY }}-${{ matrix.PLATFORM }} - path: image-name.txt - - merge: - runs-on: ubuntu-latest - needs: build - outputs: - DOCKER_TAGS: ${{ steps.meta.outputs.tags }} - steps: - # TODO: currently downloading all artifacts of the entire workflow - # Revise when this request is fulfilled: - # https://github.com/actions/download-artifact/issues/214 - - name: Download image name files into separate folders - uses: actions/download-artifact@v3 + images: ${{ env.UPLD_IMAGE }} + flavor: latest=false + tags: type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-${{ input.ARCHITECHTURE }}-final + labels: org.opencontainers.image.created=${{ inputs.BUILD_DATE }} - - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + - name: Build docker images - final + uses: docker/build-push-action@v4 with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} + context: rosetta/ + push: true + file: rosetta/Dockerfile.${{ inputs.BASE_LIBRARY }} + platforms: linux/${{ input.ARCHITECHTURE }} + tags: ${{ steps.final-metadata.outputs.tags }} + labels: ${{ steps.final-metadata.outputs.labels }} + target: rosetta + build-args: | + BASE_IMAGE=${{ steps.defaults.outputs.BASE_IMAGE }} - - name: Set docker metadata - id: meta - uses: docker/metadata-action@v4 - with: - images: | - ${{ env.UPLD_IMAGE }} - flavor: | - latest=false - tags: | - type=raw,value=${{ github.run_id }}-${{ inputs.BASE_LIBRARY }}-multiarch - labels: - org.opencontainers.image.created=${{ inputs.BUILD_DATE }} - - - name: Combine images into a single multi-arch image + + - name: Generate sitrep + if: success() || failure() shell: bash -x -e {0} run: | - docker manifest create ${{ steps.meta.outputs.tags }} $( - for IMAGE in $(cat image-name-${{ inputs.BASE_LIBRARY }}-*/image-name.txt); do - REPO=$(echo $IMAGE | cut -d: -f1) - DIGEST=$( - docker manifest inspect $IMAGE |\ - jq -r '.manifests[] | select(.platform.os == "linux") | .digest' - ) - echo $REPO@${DIGEST} - done - ) - docker manifest push ${{ steps.meta.outputs.tags }} \ No newline at end of file + # bring in utility functions + source .github/workflows/scripts/to_json.sh + + badge_label='JAX ${{ inputs.ARCHITECTURE }} build' + tags="${{ steps.final-metadata.outputs.tags }}" + digest="${{ steps.final-build.outputs.digest }}" + outcome="${{ steps.final-build.outcome }}" + + if [[ ${outcome} == "success" ]]; then + badge_message="pass" + badge_color=brightgreen + summary="JAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" + else + badge_message="fail" + badge_color=red + summary="JAX build on ${{ inputs.ARCHITECTURE }}: $badge_message" + fi + + to_json \ + summary \ + badge_label tags digest outcome \ + > sitrep.json + + schemaVersion=1 \ + label="${badge_label}" \ + message="${badge_message}" \ + color="${badge_color}" \ + to_json schemaVersion label message color \ + > ${{ env.BADGE_FILENAME_FULL }} \ No newline at end of file diff --git a/.github/workflows/_sandbox.yaml b/.github/workflows/_sandbox.yaml index eaeae2bf6..67a444e88 100644 --- a/.github/workflows/_sandbox.yaml +++ b/.github/workflows/_sandbox.yaml @@ -32,6 +32,14 @@ jobs: TE_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAG_FINAL }} secrets: inherit + build-rosetta: + needs: [build-pax] + uses: ./.github/worklows/_build_rosetta.yaml + with: + ARCHITECTURE: amd64 + BASE_LIBRARY: pax + BASE_IMAGE: ${{ needs.build-pax.outputs.DOCKER_TAG_FINAL }} + # test-pax: # needs: [build-pax] # uses: ./.github/workflows/_test_pax.yaml diff --git a/.github/workflows/_test_pax_rosetta.yaml b/.github/workflows/_test_pax_rosetta.yaml index c69736a4c..14919e06a 100644 --- a/.github/workflows/_test_pax_rosetta.yaml +++ b/.github/workflows/_test_pax_rosetta.yaml @@ -1,4 +1,4 @@ -name: ~test Pax, MGMN +name: ~test Pax, multi-node on: workflow_call: @@ -20,7 +20,7 @@ on: jobs: - multi-gpu-multi-node-te: + rosetta-pax-multi-node-te: strategy: matrix: PARALLEL_CONFIG: @@ -157,7 +157,7 @@ jobs: name: ${{ steps.meta.outputs.JOB_NAME }} path: output/* - multi-gpu-multi-node: + rosetta-pax-multi-node: strategy: matrix: PARALLEL_CONFIG: @@ -292,7 +292,7 @@ jobs: path: output/* - multi-gpu-single-node-dropout-te: + rosetta-pax-single-node-dropout-te: strategy: matrix: PARALLEL_CONFIG: @@ -428,7 +428,7 @@ jobs: metrics: - needs: [multi-gpu-multi-node, multi-gpu-multi-node-te, multi-gpu-single-node-dropout-te] + needs: [rosetta-pax-multi-node, rosetta-pax-multi-node-te, rosetta-pax-single-node-dropout-te] runs-on: ubuntu-22.04 steps: @@ -466,7 +466,7 @@ jobs: publish-test: - needs: [multi-gpu-multi-node, multi-gpu-multi-node-te, multi-gpu-single-node-dropout-te, metrics] + needs: [rosetta-pax-multi-node, rosetta-pax-multi-node-te, rosetta-pax-single-node-dropout-te, metrics] uses: ./.github/workflows/_publish_badge.yaml if: ( always() ) secrets: inherit diff --git a/.github/workflows/nightly-rosetta-pax-build.yaml b/.github/workflows/nightly-rosetta-pax-build-test.yaml similarity index 54% rename from .github/workflows/nightly-rosetta-pax-build.yaml rename to .github/workflows/nightly-rosetta-pax-build-test.yaml index c12cfd8f0..e28dd0f2e 100644 --- a/.github/workflows/nightly-rosetta-pax-build.yaml +++ b/.github/workflows/nightly-rosetta-pax-build-test.yaml @@ -31,61 +31,108 @@ permissions: jobs: metadata: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' runs-on: ubuntu-22.04 outputs: - BUILD_DATE: ${{ steps.meta-vars.outputs.BUILD_DATE }} - BASE_LIBRARY: ${{ steps.meta-vars.outputs.BASE_LIBRARY }} - BASE_IMAGE: ${{ steps.meta-vars.outputs.BASE_IMAGE }} - PUBLISH: ${{ steps.meta-vars.outputs.PUBLISH }} + BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} + BASE_LIBRARY: ${{ steps.base-metadata.outputs.BASE_LIBRARY }} + BASE_IMAGE: ${{ steps.base-metadata.outputs.BASE_IMAGE }} + PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }} steps: - - name: Set build metadata - id: meta-vars + - name: Check if the triggering workflow failed + id: if-upstream-failed + shell: bash -x -e {0} + run: | + echo "UPSTREAM_FAILED=${{ github.event.workflow_run.conclusion != 'success' }}" >> $GITHUB_OUTPUT + + - name: Cancel workflow if upstream workflow did not success + if: ${{ steps.if-upstream-failed.outputs.UPSTREAM_FAILED == 'true' }} + uses: styfle/cancel-workflow-action@0.12.0 + + - name: Determine if the resulting container should be 'published' + id: if-publish + shell: bash -x -e {0} + run: + # A container should be published if: + # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or + # 2) the workflow is triggered by workflow_run (i.e., a nightly build) + echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT + + - name: Set build date + id: date shell: bash -x -e {0} run: | BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') + echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT + + - name: Set base library and image + id: base-metadata + shell: bash -x -e {0} + run: | if [[ -z "${{ inputs.BASE_IMAGE }}" ]]; then BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ env.BASE_LIBRARY }}:latest else BASE_IMAGE=${{ inputs.BASE_IMAGE }} fi - echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT + echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT echo "BASE_IMAGE=${BASE_IMAGE}" >> $GITHUB_OUTPUT - echo "PUBLISH=${{ inputs.PUBLISH }}" >> $GITHUB_OUTPUT - build: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' + amd64: needs: metadata uses: ./.github/workflows/_build_rosetta.yaml with: + ARCHITECTURE: amd64 BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE }} secrets: inherit - publish-build: - needs: [metadata, build] - uses: ./.github/workflows/_publish_badge.yaml - if: ( always() ) + arm64: + needs: metadata + uses: ./.github/workflows/_build_rosetta.yaml + with: + ARCHITECTURE: arm64 + BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} + BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} + BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE }} secrets: inherit + + public-mealkit: + needs: [metadata, amd64, arm64] + if: needs.metadata.output.PUBLISH == 'true' + uses: ./.github/workflows/_publish_container.yaml + with: | + SOURCE_IMAGE: | + ${{ needs.amd64.outputs.DOCKER_TAG_MEALKIT }} + ${{ needs.arm64.outputs.DOCKER_TAG_MEALKIT }} + TARGET_IMAGE: upstream-pax + TARGET_TAGS: | + type=raw,value=mealkit,priority=500 + type=raw,value=mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500 + + publish-final: + needs: [metadata, amd64, arm64] + if: needs.metadata.outputs.PUBLISH == 'true' + uses: ./.github/workflows/_publish_container.yaml with: - ENDPOINT_FILENAME: 'rosetta-pax-build-status.json' - PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} - SCRIPT: | - if [[ ${{ needs.build.result }} == "success" ]]; then - BADGE_COLOR=brightgreen - MSG=passing - else - BADGE_COLOR=red - MSG=failing - fi - echo "LABEL='nightly'" >> $GITHUB_OUTPUT - echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT - echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT + SOURCE_IMAGE: | + ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} + ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }} + TARGET_IMAGE: upstream-pax + TARGET_TAGS: | + type=raw,value=latest,priority=1000 + type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 + + finalize: + if: always() + needs: [metadata, amd64, arm64] + uses: ./.github/workflows/_finalize.yaml + with: + PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }} + secrets: inherit test-pax: - needs: build + needs: [metadata, amd64, arm64] uses: ./.github/workflows/_test_pax_rosetta.yaml if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: @@ -93,7 +140,7 @@ jobs: secrets: inherit publish-test: - needs: [metadata, build, test-pax] + needs: [metadata, amd64, arm64, test-pax] uses: ./.github/workflows/_publish_badge.yaml if: ( always() ) secrets: inherit @@ -119,32 +166,4 @@ jobs: fi echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT - echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT - - publish-latest-container: - needs: [metadata, build, test-pax] - if: ( ${{ needs.test-pax.outputs.TEST_STATUS == 'success' }} ) && ((github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH)) - uses: ./.github/workflows/_publish_container.yaml - secrets: inherit - with: - SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} - TARGET_IMAGE: pax - TARGET_TAGS: | - type=raw,value=latest,priority=1000 - - publish-container: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) - needs: [metadata, build] - uses: ./.github/workflows/_publish_container.yaml - secrets: inherit - with: - SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} - TARGET_IMAGE: pax - TARGET_TAGS: | - type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 - - if-upstream-failed: - runs-on: ubuntu-latest - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch' - steps: - - run: echo 'Upstream workflow failed, aborting run' && exit 1 + echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT \ No newline at end of file diff --git a/.github/workflows/nightly-rosetta-t5x-build-test.yaml b/.github/workflows/nightly-rosetta-t5x-build-test.yaml index 360f8f586..ed0c46efd 100644 --- a/.github/workflows/nightly-rosetta-t5x-build-test.yaml +++ b/.github/workflows/nightly-rosetta-t5x-build-test.yaml @@ -31,71 +31,109 @@ permissions: jobs: metadata: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' runs-on: ubuntu-22.04 outputs: - BUILD_DATE: ${{ steps.meta-vars.outputs.BUILD_DATE }} - BASE_LIBRARY: ${{ steps.meta-vars.outputs.BASE_LIBRARY }} - BASE_IMAGE: ${{ steps.meta-vars.outputs.BASE_IMAGE }} - PUBLISH: ${{ steps.meta-vars.outputs.PUBLISH }} + BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} + BASE_LIBRARY: ${{ steps.base-metadata.outputs.BASE_LIBRARY }} + BASE_IMAGE: ${{ steps.base-metadata.outputs.BASE_IMAGE }} + PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }} steps: - - name: Set build metadata - id: meta-vars + - name: Check if the triggering workflow failed + id: if-upstream-failed + shell: bash -x -e {0} + run: | + echo "UPSTREAM_FAILED=${{ github.event.workflow_run.conclusion != 'success' }}" >> $GITHUB_OUTPUT + + - name: Cancel workflow if upstream workflow did not success + if: ${{ steps.if-upstream-failed.outputs.UPSTREAM_FAILED == 'true' }} + uses: styfle/cancel-workflow-action@0.12.0 + + - name: Determine if the resulting container should be 'published' + id: if-publish + shell: bash -x -e {0} + run: + # A container should be published if: + # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or + # 2) the workflow is triggered by workflow_run (i.e., a nightly build) + echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT + + - name: Set build date + id: date shell: bash -x -e {0} run: | BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') + echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT + + - name: Set base library and image + id: base-metadata + shell: bash -x -e {0} + run: | if [[ -z "${{ inputs.BASE_IMAGE }}" ]]; then BASE_IMAGE=${{ env.DOCKER_REGISTRY }}/upstream-${{ env.BASE_LIBRARY }}:latest else BASE_IMAGE=${{ inputs.BASE_IMAGE }} fi - echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT + echo "BASE_LIBRARY=${{ env.BASE_LIBRARY }}" >> $GITHUB_OUTPUT echo "BASE_IMAGE=${BASE_IMAGE}" >> $GITHUB_OUTPUT - echo "PUBLISH=${{ inputs.PUBLISH }}" >> $GITHUB_OUTPUT - - build: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' + + amd64: needs: metadata uses: ./.github/workflows/_build_rosetta.yaml with: + ARCHITECTURE: amd64 BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} BASE_LIBRARY: ${{ needs.metadata.outputs.BASE_LIBRARY }} BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE }} - # TODO: Can't build ARM until https://github.com/NVIDIA/JAX-Toolbox/pull/252 is available - PLATFORMS: '["amd64"]' secrets: inherit + + arm64: + needs: metadata + runs-on: ubuntu-22.04 + outputs: + DOCKER_TAG_MEALKIT='' + steps: + - name: Generate placeholder warning + shell: bash -x -e {0} + run: | + echo "WARNING: arm64 build is not yet supported" + + public-mealkit: + needs: [metadata, amd64, arm64] + if: needs.metadata.output.PUBLISH == 'true' + uses: ./.github/workflows/_publish_container.yaml + with: | + SOURCE_IMAGE: | + ${{ needs.amd64.outputs.DOCKER_TAG_MEALKIT }} + ${{ needs.arm64.outputs.DOCKER_TAG_MEALKIT }} + TARGET_IMAGE: upstream-pax + TARGET_TAGS: | + type=raw,value=mealkit,priority=500 + type=raw,value=mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500 - publish-build: - needs: [metadata, build] - uses: ./.github/workflows/_publish_badge.yaml - if: ( always() ) - secrets: inherit + publish-final: + needs: [metadata, amd64, arm64] + if: needs.metadata.outputs.PUBLISH == 'true' + uses: ./.github/workflows/_publish_container.yaml with: - ENDPOINT_FILENAME: 'rosetta-t5x-build-status.json' - PUBLISH: ${{ github.event_name == 'workflow_run' || needs.metadata.outputs.PUBLISH == 'true' }} - SCRIPT: | - if [[ ${{ needs.build.result }} == "success" ]]; then - BADGE_COLOR=brightgreen - MSG=passing - else - BADGE_COLOR=red - MSG=failing - fi - echo "LABEL='nightly'" >> $GITHUB_OUTPUT - echo "MESSAGE='${MSG}'" >> $GITHUB_OUTPUT - echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT + SOURCE_IMAGE: | + ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} + ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }} + TARGET_IMAGE: upstream-t5x + TARGET_TAGS: | + type=raw,value=latest,priority=1000 + type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 test-unit: if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' - needs: build + needs: [metadata, amd64, arm64] uses: ./.github/workflows/_test_rosetta.yaml with: ROSETTA_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} secrets: inherit test-t5x: - needs: build + needs: [metadata, amd64, arm64] uses: ./.github/workflows/_test_t5x_rosetta.yaml if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: @@ -103,7 +141,7 @@ jobs: secrets: inherit test-vit: - needs: build + needs: [metadata, amd64, arm64] uses: ./.github/workflows/_test_vit.yaml if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || github.event_name == 'workflow_dispatch' with: @@ -120,7 +158,7 @@ jobs: secrets: inherit publish-test: - needs: [metadata, build, test-unit, test-t5x, test-vit] + needs: [metadata, test-unit, test-t5x, test-vit] uses: ./.github/workflows/_publish_badge.yaml if: ( always() ) secrets: inherit @@ -156,30 +194,11 @@ jobs: echo "MESSAGE='${MESSAGE}'" >> $GITHUB_OUTPUT echo "COLOR='${COLOR}'" >> $GITHUB_OUTPUT - publish-latest-container: - needs: [metadata, build, test-t5x, test-unit, test-vit] - if: ( needs.test-unit.outputs.TEST_STATUS == 'success' && needs.test-t5x.outputs.TEST_STATUS == 'success' && needs.test-vit.outputs.TEST_STATUS == 'success' ) && ((github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH)) - uses: ./.github/workflows/_publish_container.yaml - secrets: inherit + finalize: + if: always() + needs: [metadata, amd64, arm64] + uses: ./.github/workflows/_finalize.yaml with: - SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} - TARGET_IMAGE: t5x - TARGET_TAGS: | - type=raw,value=latest,priority=1000 - - publish-container: - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) - needs: [metadata, build] - uses: ./.github/workflows/_publish_container.yaml + PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }} secrets: inherit - with: - SOURCE_IMAGE: ${{ needs.build.outputs.DOCKER_TAGS }} - TARGET_IMAGE: t5x - TARGET_TAGS: | - type=raw,value=nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900 - if-upstream-failed: - runs-on: ubuntu-latest - if: (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'failure') && github.event_name != 'workflow_dispatch' - steps: - - run: echo 'Upstream workflow failed, aborting run' && exit 1 diff --git a/.github/workflows/nightly-t5x-build.yaml b/.github/workflows/nightly-t5x-build.yaml index 4c29136cf..8f0ad277f 100644 --- a/.github/workflows/nightly-t5x-build.yaml +++ b/.github/workflows/nightly-t5x-build.yaml @@ -65,7 +65,7 @@ jobs: needs: metadata runs-on: ubuntu-22.04 outputs: - DOCKER_TAG_FINAL: '' + DOCKER_TAG_MEALKIT: '' steps: - name: Generate placeholder warning shell: bash -x -e {0} diff --git a/.github/workflows/nightly-te-test.yaml b/.github/workflows/nightly-te-test.yaml index 64644294f..d95de68c1 100644 --- a/.github/workflows/nightly-te-test.yaml +++ b/.github/workflows/nightly-te-test.yaml @@ -3,7 +3,7 @@ run-name: Nightly Transformer Engine test (${{ github.event_name == 'workflow_ru on: workflow_run: - workflows: [Nightly JAX build] + workflows: [Nightly Pax build] types: [completed] branches: [main] workflow_dispatch: @@ -12,7 +12,7 @@ on: type: string description: 'JAX-TE image build by NVIDIA/JAX-Toolbox' required: true - default: 'ghcr.io/nvidia/jax:latest' + default: 'ghcr.io/nvidia/upstream-pax:latest' PUBLISH: type: boolean description: Update status badge? @@ -25,7 +25,7 @@ permissions: packages: write # to upload container env: - DEFAULT_JAX_TE_IMAGE: 'ghcr.io/nvidia/jax:latest' + DEFAULT_JAX_TE_IMAGE: 'ghcr.io/nvidia/upstream-pax:latest' jobs: diff --git a/rosetta/Dockerfile.pax b/rosetta/Dockerfile.pax index 8250827e3..503be8dcb 100644 --- a/rosetta/Dockerfile.pax +++ b/rosetta/Dockerfile.pax @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1-labs -ARG BASE_IMAGE=ghcr.io/nvidia/upstream-pax:latest +ARG BASE_IMAGE=ghcr.io/nvidia/upstream-pax:mealkit ARG GIT_USER_EMAIL=jax@nvidia.com ARG GIT_USER_NAME=NVIDIA diff --git a/rosetta/Dockerfile.t5x b/rosetta/Dockerfile.t5x index 3878ff5c0..b25223fab 100644 --- a/rosetta/Dockerfile.t5x +++ b/rosetta/Dockerfile.t5x @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1-labs -ARG BASE_IMAGE=ghcr.io/nvidia/upstream-t5x:latest +ARG BASE_IMAGE=ghcr.io/nvidia/upstream-t5x:mealkit ARG GIT_USER_EMAIL=jax@nvidia.com ARG GIT_USER_NAME=NVIDIA