Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #12

Summary
Jobs
- benchmarks
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/wasm-benchmarks.yml at 6dcc071

	name: "QE: WASM benchmarks"
	on:
	pull_request:
	paths-ignore:
	- ".github/**"
	- "!.github/workflows/wasm-benchmarks.yml"
	- ".buildkite/**"
	- "*.md"
	- "LICENSE"
	- "CODEOWNERS"

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	benchmarks:
	runs-on: ubuntu-latest
	env: # Set environment variables for the whole job
	PROFILE: release
	steps:
	- name: Checkout PR branch
	uses: actions/checkout@v4

	- name: "Setup Node.js"
	uses: actions/setup-node@v4
	with:
	node-version: ${{ matrix.node_version }}

	- name: "Setup pnpm"
	uses: pnpm/action-setup@v2
	with:
	version: 8

	- name: "Login to Docker Hub"
	uses: docker/login-action@v3
	continue-on-error: true
	env:
	DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
	DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
	if: "${{ env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}"
	with:
	username: ${{ secrets.DOCKERHUB_USERNAME }}
	password: ${{ secrets.DOCKERHUB_TOKEN }}

	- uses: cachix/install-nix-action@v24

	- name: Setup benchmark
	run: make setup-pg-bench

	- name: Run benchmarks
	id: bench
	run: \|
	make run-bench \| tee results.txt

	# Save the output to a file so we can use it in the comment
	{
	echo 'bench_output<<EOF'
	cat results.txt
	echo EOF
	} >> "$GITHUB_OUTPUT"

	regressed_values=$(grep "slower than Web Assembly: Latest" results.txt \| cut -f1 -d'x' \| awk '$1 > 1.02' \| wc -l )
	improved_values=$(grep "faster than Web Assembly: Latest" results.txt \| cut -f1 -d'x')

	# Initialize sum variable and count
	total_sum=0
	total_count=0

	# Add the inverted regressed values to the sum
	for value in $regressed_values; do
	inverted=$(echo "scale=4; 1/$value" \| bc)
	total_sum=$(echo "$total_sum + $inverted" \| bc)
	((total_count++))
	done

	# Add the improved values to the sum
	for value in $improved_values; do
	total_sum=$(echo "$total_sum + $value" \| bc)
	((total_count++))
	done

	if [ $total_count -eq 0 ]; then
	echo "summary=✅ WASM query-engine: no benchmarks have changed substantially" >> "$GITHUB_OUTPUT"
	echo "status=passed" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# Calculate the percentage of improvement or worsening
	if (( $(echo "$mean > 1.02" \| bc -l) )); then
	change_percentage=$(echo "scale=4; $mean - 1" \| bc)
	summary="🚀 The benchmark suite has improved by $(echo "$change_percentage * 100" \| bc) percent."
	status=passed
	elif (( $(echo "$mean < 0.98" \| bc -l) )); then
	change_percentage=$(echo "scale=4; 1 - (1 / $mean)" \| bc)
	summary="❌ The benchmark suite has worsened by $(echo "$change_percentage * 100" \| bc) percent."
	status=failed
	else
	summary="✅ WASM query-engine: no benchmarks have changed substantially"
	status=passed
	fi

	echo "summary=$summary" >> "$GITHUB_OUTPUT"
	echo "status=$status" >> "$GITHUB_OUTPUT"

	- name: Find past report comment
	uses: peter-evans/find-comment@v2
	id: findReportComment
	with:
	issue-number: ${{ github.event.pull_request.number }}
	body-includes: "<!-- wasm-engine-perf -->"

	- name: Create or update report
	uses: peter-evans/create-or-update-comment@v3
	with:
	comment-id: ${{ steps.findReportComment.outputs.comment-id }}
	issue-number: ${{ github.event.pull_request.number }}
	body: \|
	<!-- wasm-engine-perf -->
	#### ${{ steps.bench.outputs.summary }}

	<details>
	<summary>Full benchmark report</summary>

	```
	${{ steps.bench.outputs.bench_output }}
	```
	</details>

	After changes in ${{ github.event.pull_request.head.sha }}
	edit-mode: replace

	- name: Fail workflow if regression detected
	if: steps.bench.outputs.status == 'failed'
	run: \|
	echo "Workflow failed due to benchmark regression."
	exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #12

Workflow file

Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #12

Jobs

Run details

Workflow file for this run