Skip to content

Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #15

Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results

Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #15

Workflow file for this run

name: "QE: WASM benchmarks"
on:
pull_request:
paths-ignore:
- ".github/**"
- "!.github/workflows/wasm-benchmarks.yml"
- ".buildkite/**"
- "*.md"
- "LICENSE"
- "CODEOWNERS"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
benchmarks:
runs-on: ubuntu-latest
env: # Set environment variables for the whole job
PROFILE: release
steps:
- name: Checkout PR branch
uses: actions/checkout@v4
- name: "Setup Node.js"
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}
- name: Install bc
run: sudo apt-get install -y bc
- name: "Setup pnpm"
uses: pnpm/action-setup@v2
with:
version: 8
- name: "Login to Docker Hub"
uses: docker/login-action@v3
continue-on-error: true
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
if: "${{ env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}"
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- uses: cachix/install-nix-action@v24
- name: Setup benchmark
run: make setup-pg-bench
- name: Run benchmarks
id: bench
run: |
make run-bench | tee results.txt
# Save the output to a file so we can use it in the comment
{
echo 'bench_output<<EOF'
cat results.txt
echo EOF
} >> "$GITHUB_OUTPUT"
regressed_values=$(grep "slower than Web Assembly: Latest" results.txt | cut -f1 -d'x')
improved_values=$(grep "faster than Web Assembly: Latest" results.txt | cut -f1 -d'x')
# Initialize sum variable and count
total_sum=0
total_count=0
# Add the inverted regressed values to the sum
for value in $regressed_values; do
inverted=$(echo "scale=4; 1/$value" | bc)
echo "Regressed value: $inverted"
total_sum=$(echo "$total_sum + $inverted" | bc)
((total_count++))
done
# Add the improved values to the sum
for value in $improved_values; do
echo "Improved value: $value"
total_sum=$(echo "$total_sum + $value" | bc)
((total_count++))
done
if [ $total_count -eq 0 ]; then
echo "summary=βœ… WASM query-engine: no benchmarks have changed substantially" >> "$GITHUB_OUTPUT"
echo "status=passed" >> "$GITHUB_OUTPUT"
exit 0
fi
mean=$(echo "scale=4; $total_sum / $total_count" | bc)
echo "Extracted $total_count values from the benchmark output"
echo "Total sum: $total_sum"
echo "Total count: $total_count"
echo "Mean: $mean"
# Calculate the percentage of improvement or worsening
if (( $(echo "$mean > 1.01" | bc -l) )); then
change_percentage=$(echo "scale=4; $mean - 1" | bc)
summary="πŸš€ WASM query-engine performance will improve by $(echo "$change_percentage * 100" | bc) percent"
status=passed
elif (( $(echo "$mean < 0.99" | bc -l) )); then
change_percentage=$(echo "scale=4; (1 / $mean) - 1" | bc)
summary="❌ WASM query-engine performance will worsen by $(echo "$change_percentage * 100" | bc) percent"
status=failed
else
change_percentage=$(echo "scale=4; (1 / $mean)" | bc)
summary="βœ… WASM query-engine performance won't change substantially. AVG(latency) = $(echo "$change_percentage")x"
status=passed
fi
echo "summary=$summary" >> "$GITHUB_OUTPUT"
echo "status=$status" >> "$GITHUB_OUTPUT"
- name: Find past report comment
uses: peter-evans/find-comment@v2
id: findReportComment
with:
issue-number: ${{ github.event.pull_request.number }}
body-includes: "<!-- wasm-engine-perf -->"
- name: Create or update report
uses: peter-evans/create-or-update-comment@v3
with:
comment-id: ${{ steps.findReportComment.outputs.comment-id }}
issue-number: ${{ github.event.pull_request.number }}
body: |
<!-- wasm-engine-perf -->
#### ${{ steps.bench.outputs.summary }}
<details>
<summary>Full benchmark report</summary>
```
${{ steps.bench.outputs.bench_output }}
```
</details>
After changes in ${{ github.event.pull_request.head.sha }}
edit-mode: replace
- name: Fail workflow if regression detected
if: steps.bench.outputs.status == 'failed'
run: |
echo "Workflow failed due to benchmark regression."
exit 1