Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #12
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "QE: WASM benchmarks" | |
on: | |
pull_request: | |
paths-ignore: | |
- ".github/**" | |
- "!.github/workflows/wasm-benchmarks.yml" | |
- ".buildkite/**" | |
- "*.md" | |
- "LICENSE" | |
- "CODEOWNERS" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
benchmarks: | |
runs-on: ubuntu-latest | |
env: # Set environment variables for the whole job | |
PROFILE: release | |
steps: | |
- name: Checkout PR branch | |
uses: actions/checkout@v4 | |
- name: "Setup Node.js" | |
uses: actions/setup-node@v4 | |
with: | |
node-version: ${{ matrix.node_version }} | |
- name: "Setup pnpm" | |
uses: pnpm/action-setup@v2 | |
with: | |
version: 8 | |
- name: "Login to Docker Hub" | |
uses: docker/login-action@v3 | |
continue-on-error: true | |
env: | |
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} | |
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} | |
if: "${{ env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}" | |
with: | |
username: ${{ secrets.DOCKERHUB_USERNAME }} | |
password: ${{ secrets.DOCKERHUB_TOKEN }} | |
- uses: cachix/install-nix-action@v24 | |
- name: Setup benchmark | |
run: make setup-pg-bench | |
- name: Run benchmarks | |
id: bench | |
run: | | |
make run-bench | tee results.txt | |
# Save the output to a file so we can use it in the comment | |
{ | |
echo 'bench_output<<EOF' | |
cat results.txt | |
echo EOF | |
} >> "$GITHUB_OUTPUT" | |
regressed_values=$(grep "slower than Web Assembly: Latest" results.txt | cut -f1 -d'x' | awk '$1 > 1.02' | wc -l ) | |
improved_values=$(grep "faster than Web Assembly: Latest" results.txt | cut -f1 -d'x') | |
# Initialize sum variable and count | |
total_sum=0 | |
total_count=0 | |
# Add the inverted regressed values to the sum | |
for value in $regressed_values; do | |
inverted=$(echo "scale=4; 1/$value" | bc) | |
total_sum=$(echo "$total_sum + $inverted" | bc) | |
((total_count++)) | |
done | |
# Add the improved values to the sum | |
for value in $improved_values; do | |
total_sum=$(echo "$total_sum + $value" | bc) | |
((total_count++)) | |
done | |
if [ $total_count -eq 0 ]; then | |
echo "summary=β WASM query-engine: no benchmarks have changed substantially" >> "$GITHUB_OUTPUT" | |
echo "status=passed" >> "$GITHUB_OUTPUT" | |
exit 0 | |
fi | |
# Calculate the percentage of improvement or worsening | |
if (( $(echo "$mean > 1.02" | bc -l) )); then | |
change_percentage=$(echo "scale=4; $mean - 1" | bc) | |
summary="π The benchmark suite has improved by $(echo "$change_percentage * 100" | bc) percent." | |
status=passed | |
elif (( $(echo "$mean < 0.98" | bc -l) )); then | |
change_percentage=$(echo "scale=4; 1 - (1 / $mean)" | bc) | |
summary="β The benchmark suite has worsened by $(echo "$change_percentage * 100" | bc) percent." | |
status=failed | |
else | |
summary="β WASM query-engine: no benchmarks have changed substantially" | |
status=passed | |
fi | |
echo "summary=$summary" >> "$GITHUB_OUTPUT" | |
echo "status=$status" >> "$GITHUB_OUTPUT" | |
- name: Find past report comment | |
uses: peter-evans/find-comment@v2 | |
id: findReportComment | |
with: | |
issue-number: ${{ github.event.pull_request.number }} | |
body-includes: "<!-- wasm-engine-perf -->" | |
- name: Create or update report | |
uses: peter-evans/create-or-update-comment@v3 | |
with: | |
comment-id: ${{ steps.findReportComment.outputs.comment-id }} | |
issue-number: ${{ github.event.pull_request.number }} | |
body: | | |
<!-- wasm-engine-perf --> | |
#### ${{ steps.bench.outputs.summary }} | |
<details> | |
<summary>Full benchmark report</summary> | |
``` | |
${{ steps.bench.outputs.bench_output }} | |
``` | |
</details> | |
After changes in ${{ github.event.pull_request.head.sha }} | |
edit-mode: replace | |
- name: Fail workflow if regression detected | |
if: steps.bench.outputs.status == 'failed' | |
run: | | |
echo "Workflow failed due to benchmark regression." | |
exit 1 |