Skip to content

Commit

Permalink
[CI] add flaky reports
Browse files Browse the repository at this point in the history
Signed-off-by: sule <sule.sl@antgroup.com>
  • Loading branch information
xsuler committed Feb 25, 2025
1 parent 1162909 commit eb96da5
Show file tree
Hide file tree
Showing 5 changed files with 980 additions and 16 deletions.
77 changes: 77 additions & 0 deletions .github/workflows/flaky-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Flaky Test Report

on:
schedule:
# Run daily at midnight UTC
- cron: '0 0 * * *'
workflow_dispatch:
# Allow manual triggering
inputs:
num_artifacts:
description: 'Number of artifacts to fetch (default: 20)'
required: false
default: '20'
type: string
max_runs_to_check:
description: 'Maximum number of workflow runs to check (default: 50)'
required: false
default: '50'
type: string

jobs:
generate-report:
runs-on: ubuntu-latest
permissions:
contents: read
actions: read

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
- name: Fetch artifacts
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NUM_ARTIFACTS: ${{ github.event.inputs.num_artifacts || '20' }}
MAX_RUNS: ${{ github.event.inputs.max_runs_to_check || '50' }}
run: |
python ci/ray_ci/fetch_artifacts.py --num-artifacts $NUM_ARTIFACTS --max-runs $MAX_RUNS
echo "Artifacts fetched successfully"
ls -la artifacts/
- name: Generate flaky test report
run: |
python ci/ray_ci/flaky_report.py
echo "Report generated successfully"
ls -la flaky_report.html
- name: Upload report artifact
id: artifact-upload-step
uses: actions/upload-artifact@v4
with:
name: flaky-test-report
path: |
flaky_report.html
retention-days: 90

- name: Create Summary
run: |
echo "### 📊 Test Reports Available" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "View detailed test results at:" >> $GITHUB_STEP_SUMMARY
echo "🔗 [Ant Ray Dashboard](https://ant-ray.streamlit.app?run_id=${{ github.run_id }}&artifact_id=${{ steps.artifact-upload-step.outputs.artifact-id }})" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "This link contains test reports and analysis for commit \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### 📈 Report Preview" >> $GITHUB_STEP_SUMMARY
echo "Download the [Flaky Test Report](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> $GITHUB_STEP_SUMMARY
27 changes: 27 additions & 0 deletions .github/workflows/ray-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ jobs:
else
echo "exists=false" >> $GITHUB_OUTPUT
fi
- name: Build core CI Base
if: steps.check_core_image.outputs.exists != 'true'
uses: docker/build-push-action@v3
Expand All @@ -250,6 +251,7 @@ jobs:
elif ! docker ps --format '{{.Names}}' | grep -q '^local-registry$'; then
docker start local-registry
fi
- name: Push core build image to local registry
if: '!cancelled()'
run: |
Expand All @@ -272,6 +274,7 @@ jobs:
if: '!cancelled()'
run: |
bash ci/env/install-bazel.sh
- name: Check Manylinux Image Existence
id: check_manylinux_image
run: |
Expand All @@ -280,6 +283,7 @@ jobs:
else
echo "exists=false" >> $GITHUB_OUTPUT
fi
- name: Build Manylinux Image
if: steps.check_manylinux_image.outputs.exists != 'true'
uses: docker/build-push-action@v3
Expand All @@ -292,6 +296,7 @@ jobs:
build-args: |
BUILDKITE_BAZEL_CACHE_URL
HOSTTYPE=x86_64
- name: Run Core Python Tests (1)
if: '!cancelled()'
env:
Expand All @@ -302,16 +307,19 @@ jobs:
--workers 1 --worker-id 0 --parallelism-per-worker 12 \
--except-tags debug_tests,asan_tests,post_wheel_build,ha_integration,mem_pressure,tmpfs,container,manual,use_all_core,multi_gpu,large_size_python_tests_shard_2 \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }}) \
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_1 }}
run: |
python ci/ray_ci/report_gen.py
- name: Run Core Python Tests (2)
if: '!cancelled()'
env:
Expand All @@ -324,16 +332,19 @@ jobs:
--except-tags debug_tests,asan_tests,post_wheel_build,ha_integration,mem_pressure,tmpfs,container,manual,use_all_core,multi_gpu,large_size_python_tests_shard_2 \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }}) \
--skip-ray-installation
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_2 }}
run: |
python ci/ray_ci/report_gen.py
- name: Run Core Python Tests (3)
if: '!cancelled()'
env:
Expand All @@ -345,16 +356,19 @@ jobs:
--workers 1 --worker-id 0 --parallelism-per-worker 12 --skip-ray-installation \
--only-tags use_all_core \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }})
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_3 }}
run: |
python ci/ray_ci/report_gen.py
- name: Update Test Image
if: '!cancelled()'
run: |
Expand All @@ -371,10 +385,12 @@ jobs:
bazel run //ci/ray_ci:test_in_docker -- //:all //src/... core --build-type clang \
--workers 1 --worker-id 0 --parallelism-per-worker 12 --skip-ray-installation \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }})
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
Expand All @@ -391,16 +407,19 @@ jobs:
bazel run //ci/ray_ci:test_in_docker -- python/ray/dashboard/... core \
--parallelism-per-worker 12 --skip-ray-installation \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }})
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_5 }}
run: |
python ci/ray_ci/report_gen.py
- name: Run Workflow Tests
if: '!cancelled()'
env:
Expand All @@ -424,12 +443,14 @@ jobs:
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_6 }}
run: |
python ci/ray_ci/report_gen.py
- name: Run Debug Tests
if: '!cancelled()'
env:
Expand All @@ -444,16 +465,19 @@ jobs:
--except-tags kubernetes,manual \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }}) \
--skip-ray-installation
- name: Grant Permissions
if: '!cancelled()'
run: |
sudo chmod -R 777 ${{ env.BAZEL_DIR }}
- name: Generate Report For Label
if: '!cancelled()'
env:
REPORT_LABEL: ${{ env.REPORT_LABEL_7 }}
run: |
python ci/ray_ci/report_gen.py
- name: Run ASAN Tests
if: '!cancelled()'
env:
Expand All @@ -468,6 +492,7 @@ jobs:
--except-tags kubernetes,manual \
--skip-ray-installation \
--test-env=BAZEL_DIR=$(realpath ${{ env.BAZEL_DIR }})
- name: Grant Permissions
if: '!cancelled()'
run: |
Expand All @@ -494,6 +519,7 @@ jobs:
docker ps -a --filter ancestor=localhost:5000/citemp:${{ env.RAYCI_BUILD_ID }}-corebuild -q | xargs -r docker rm --force || true
# Clean up any dangling containers
docker ps -a --filter status=exited -q | xargs -r docker rm || true
- name: Upload Test Reports
id: artifact-upload-step
continue-on-error: true
Expand All @@ -502,6 +528,7 @@ jobs:
with:
name: test-reports-${{ github.sha }}
path: ${{ env.artifacts_path }}/reports/
retention-days: 90

- name: Create Summary
if: '!cancelled()'
Expand Down
Loading

0 comments on commit eb96da5

Please sign in to comment.