diff --git a/.github/workflows/userbenchmark-ibmcloud-testrunner.yml b/.github/workflows/userbenchmark-ibmcloud-testrunner.yml new file mode 100644 index 0000000000..007ad09a50 --- /dev/null +++ b/.github/workflows/userbenchmark-ibmcloud-testrunner.yml @@ -0,0 +1,96 @@ +name: TorchBench Userbenchmark test on IBM Cloud +on: + workflow_dispatch: + inputs: + userbenchmark_name: + description: "Name of the user benchmark to run" + userbenchmark_options: + description: "Option of the user benchmark to run" +env: + PLATFORM_NAME: "ibm_cloud" + TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CONDA_ENV: "torchbench" + DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest" + SETUP_SCRIPT: "/workspace/setup_instance.sh" +jobs: + run-userbenchmark: + runs-on: [self-hosted, testrunner1545] + timeout-minutes: 1440 # 24 hours + steps: + - name: Checkout TorchBench + uses: actions/checkout@v3 + with: + path: benchmark + - name: Install Conda + run: | + cd benchmark + bash scripts/install_conda.sh + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ env.DOCKER_IMAGE }} + - name: Run Docker Image + run: | + CONTAINER_ID=$(docker run \ + -e CONDA_ENV \ + -e SETUP_SCRIPT \ + --tty \ + --detach \ + -v "${PWD}/benchmark:/benchmark" \ + -w / \ + "${{ env.DOCKER_IMAGE }}" \ + tail -f /dev/null + ) + echo "Container ID: ${CONTAINER_ID}" + # Write the CONTAINER_ID to GITHUB_ENV + echo "CONTAINER_ID=${CONTAINER_ID}" >> "${GITHUB_ENV}" + - name: Install TorchBench + run: | + docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash /benchmark/scripts/torchbench_install.sh + - name: Run user benchmark + run: | + # remove old results + if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi + pushd benchmark + if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi + # Run userbenchmark + MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}" + if [ -z "${MANUAL_WORKFLOW}" ]; then + # Figure out what userbenchmarks we should run, and run it + docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash -c ". ${SETUP_SCRIPT} && conda activate ${CONDA_ENV} && \ + python .github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}" + if [ -d ./.userbenchmark ]; then + cp -r ./.userbenchmark ../benchmark-output + else + mkdir ../benchmark-output + fi + else + docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash -c ". ${SETUP_SCRIPT} && conda activate ${CONDA_ENV} && \ + python run_benchmark.py \ + \"${{ github.event.inputs.userbenchmark_name }}\" ${{ github.event.inputs.userbenchmark_options }}" + cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output + fi + - name: Upload result jsons to Scribe + run: | + pushd benchmark + . scripts/activate_conda.sh + RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r)) + echo "Uploading result jsons: ${RESULTS}" + for r in ${RESULTS[@]}; do + python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}" + python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}" + done + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: TorchBench result + path: benchmark-output/ + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true