Bench #449
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Bench | |
on: | |
workflow_call: | |
workflow_dispatch: | |
schedule: | |
# Run at 1 AM each day, so there is a `main`-branch baseline in the cache. | |
- cron: '0 1 * * *' | |
env: | |
CARGO_PROFILE_BENCH_BUILD_OVERRIDE_DEBUG: true | |
CARGO_PROFILE_RELEASE_DEBUG: true | |
TOOLCHAIN: stable | |
PERF_OPT: record -F4999 --call-graph fp -g | |
SCCACHE_CACHE_SIZE: 128G | |
SCCACHE_DIRECT: true | |
MTU: 1504 # https://github.com/microsoft/msquic/issues/4618 | |
permissions: | |
contents: read | |
jobs: | |
bench: | |
name: Benchmark | |
runs-on: self-hosted # zizmor: ignore[self-hosted-runner] | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- name: Checkout neqo | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
persist-credentials: false | |
- name: Checkout msquic | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
repository: microsoft/msquic | |
ref: main | |
path: msquic | |
submodules: true | |
persist-credentials: false | |
- name: Checkout google/quiche | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
repository: google/quiche | |
ref: main | |
path: gquiche | |
submodules: true | |
persist-credentials: false | |
- name: Set PATH and environment | |
run: | | |
echo "/home/bench/.cargo/bin" >> "${GITHUB_PATH}" | |
- name: Install Rust | |
uses: ./.github/actions/rust | |
with: | |
version: $TOOLCHAIN | |
tools: hyperfine, flamegraph | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Get minimum NSS version | |
id: nss-version | |
run: echo "minimum=$(cat neqo-crypto/min_version.txt)" >> "$GITHUB_OUTPUT" | |
- name: Install NSS | |
id: nss | |
uses: ./.github/actions/nss | |
with: | |
minimum-version: ${{ steps.nss-version.outputs.minimum }} | |
- name: Build neqo | |
run: | | |
cargo "+$TOOLCHAIN" bench --workspace --features bench --no-run | |
# See https://github.com/flamegraph-rs/flamegraph for why we append to RUSTFLAGS here. | |
export RUSTFLAGS="-C link-arg=-Wl,--no-rosegment, -C force-frame-pointers=yes $RUSTFLAGS" | |
cargo "+$TOOLCHAIN" build --locked --release --bin neqo-client --bin neqo-server | |
- name: Build msquic | |
run: | | |
mkdir -p msquic/build | |
cd msquic/build | |
cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DQUIC_BUILD_TOOLS=1 -DQUIC_BUILD_PERF=1 .. | |
cmake --build . | |
- name: Build google/quiche | |
run: | | |
cd gquiche | |
bazel build -c opt --sandbox_writable_path=/home/bench/.cache/sccache quiche:quic_server quiche:quic_client | |
- name: Download cached main-branch results | |
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 | |
with: | |
path: | | |
target/criterion | |
hyperfine | |
key: bench-results-${{ runner.name }}-${{ github.sha }} | |
restore-keys: bench-results-${{ runner.name }}- | |
- name: Move cached hyperfine results | |
run: | | |
mv hyperfine hyperfine-main || true | |
mkdir -p hyperfine | |
# Disable turboboost, hyperthreading and use performance governor. | |
- name: Prepare machine | |
run: sudo /root/bin/prep.sh | |
- name: Run cargo bench | |
run: | | |
# Pin all but neqo-bin benchmarks to CPU 0. neqo-bin benchmarks run | |
# both a client and a server, thus benefiting from multiple CPU cores. | |
# | |
# Run all benchmarks at elevated priority. | |
taskset -c 0 nice -n -20 setarch --addr-no-randomize \ | |
cargo "+$TOOLCHAIN" bench --workspace --exclude neqo-bin --features bench -- --noplot | tee results.txt | |
sudo ip link set dev lo mtu "$MTU" | |
nice -n -20 setarch --addr-no-randomize \ | |
cargo "+$TOOLCHAIN" bench --package neqo-bin --features bench -- --noplot | tee -a results.txt | |
# Compare various configurations of neqo against msquic and google/quiche, and gather perf data | |
# during the hyperfine runs. | |
- name: Compare neqo, msquic and google/quiche | |
env: | |
HOST: 127.0.0.1 | |
PORT: 4433 | |
SIZE: 33554432 # 32 MB | |
RUNS: 30 | |
run: | | |
TMP=$(mktemp -d) | |
# Make a cert and key for msquic and google. | |
openssl req -nodes -new -x509 -keyout "$TMP/key" -out "$TMP/cert" -subj "/CN=DOMAIN" 2>/dev/null | |
# Make test files for msquic to serve. | |
truncate -s "$SIZE" "$TMP/$SIZE" | |
BIGSIZE=$(bc -l <<< "$SIZE * $RUNS") | |
truncate -s "$BIGSIZE" "$TMP/$BIGSIZE" | |
# Define the commands to run for each client and server. | |
declare -A client_cmd=( | |
["neqo"]="target/release/neqo-client _cc _pacing --output-dir . _flags -Q 1 https://$HOST:$PORT/$SIZE" | |
["msquic"]="msquic/build/bin/Release/quicinterop -test:D -custom:$HOST -port:$PORT -urls:https://$HOST:$PORT/$SIZE" | |
["google"]="gquiche/bazel-bin/quiche/quic_client --disable_certificate_verification https://$HOST:$PORT/$SIZE > $SIZE" | |
) | |
declare -A server_cmd=( | |
["neqo"]="target/release/neqo-server _cc _pacing _flags -Q 1 $HOST:$PORT" | |
["msquic"]="msquic/build/bin/Release/quicinteropserver -root:$TMP -listen:$HOST -port:$PORT -file:$TMP/cert -key:$TMP/key -noexit" | |
["google"]="gquiche/bazel-bin/quiche/quic_server --generate_dynamic_responses --port $PORT --certificate_file $TMP/cert --key_file $TMP/key" | |
) | |
# Flags to pass to neqo when it runs against another implementation. | |
declare -A neqo_flags=( | |
["neqo"]="" | |
["msquic"]="-a hq-interop" | |
["google"]="" | |
) | |
# Replace various placeholders in the commands with the actual values. | |
# Also generate an extension to append to the file name. | |
function transmogrify { | |
CMD=$1 | |
local cc=$2 | |
local pacing=$3 | |
local flags=$4 | |
if [[ "$cc" != "" ]]; then | |
CMD=${CMD//_cc/--cc $cc} | |
EXT="-$cc" | |
fi | |
if [[ "$pacing" == "on" ]]; then | |
CMD=${CMD//_pacing/} | |
EXT="$EXT-pacing" | |
else | |
CMD=${CMD//_pacing/--no-pacing} | |
EXT="$EXT-nopacing" | |
fi | |
CMD=${CMD//_flags/$flags} | |
} | |
# A Welch's t-test to determine if a performance change is statistically significant. | |
# We use this later to highlight significant changes in the results. | |
cat <<EOF > welch.R | |
args <- commandArgs(trailingOnly = TRUE) | |
baseline <- scan(args[1], what = numeric()) | |
result <- scan(args[2], what = numeric()) | |
t_result <- t.test(baseline, result, alternative = "two.sided") | |
p_value <- t_result\$p.value | |
alpha <- 0.05 | |
quit(status = as.integer(p_value < alpha)) | |
EOF | |
# See https://github.com/microsoft/msquic/issues/4618#issuecomment-2422611592 | |
sudo ip link set dev lo mtu "$MTU" | |
for server in neqo google msquic; do | |
for client in neqo google msquic; do | |
# Do not run msquic against google-quiche; the latter only supports H3. | |
# Also, we are not interested in google as the server, or msquic as the client, except against themselves. | |
if [[ "$client" == "google" && "$server" == "msquic" || | |
"$client" == "msquic" && "$server" == "google" || | |
"$client" != "google" && "$server" == "google" || | |
"$client" == "msquic" && "$server" != "msquic" ]]; then | |
continue | |
fi | |
# google and msquic don't let us configure the congestion control or pacing. | |
if [[ "$client" != "neqo" && "$server" != "neqo" ]]; then | |
cc_opt=("") | |
pacing_opt=("") | |
else | |
cc_opt=("reno" "cubic") | |
pacing_opt=("on" "") | |
fi | |
for cc in "${cc_opt[@]}"; do | |
for pacing in "${pacing_opt[@]}"; do | |
# Make a tag string for this test, for the results. Highlight lines we care about. | |
if [[ "$client" == "neqo" && "$server" == "neqo" && "$cc" == "cubic" && "$pacing" == "on" || | |
"$client" == "msquic" && "$server" == "msquic" || | |
"$client" == "google" && "$server" == "google" ]]; then | |
TAG="**$client**,**$server**,${cc:+**}$cc${cc:+**},${pacing:+**}$pacing${pacing:+**}" | |
else | |
TAG="$client,$server,$cc,$pacing" | |
fi | |
echo "Running benchmarks for $TAG" | tee -a comparison.txt | |
transmogrify "${server_cmd[$server]}" "$cc" "$pacing" "${neqo_flags[$client]}" | |
FILENAME="$client-$server$EXT" | |
# shellcheck disable=SC2086 | |
taskset -c 0 nice -n -20 setarch --addr-no-randomize \ | |
perf $PERF_OPT -o "$FILENAME.server.perf" $CMD & | |
PID=$! | |
transmogrify "${client_cmd[$client]}" "$cc" "$pacing" "${neqo_flags[$server]}" | |
# shellcheck disable=SC2086 | |
taskset -c 1 nice -n -20 setarch --addr-no-randomize \ | |
hyperfine --command-name "$TAG" --time-unit millisecond \ | |
--export-json "hyperfine/$FILENAME.json" \ | |
--export-markdown "hyperfine/$FILENAME.md" \ | |
--output null --warmup 5 --runs $RUNS --prepare "sleep 1" "$CMD" | | |
tee -a comparison.txt | |
echo >> comparison.txt | |
# Sanity check the size of the last retrieved file. | |
# google/quiche outputs the HTTP header, too, so we can't just check for -eq. | |
[ "$(wc -c <"$SIZE")" -ge "$SIZE" ] || exit 1 | |
# Do a longer client run with perf separately. We used to just wrap the hyperfine command above in perf, | |
# but that uses different processes for the individual runs, and there is apparently no way to merge | |
# the perf profiles of those different runs. | |
CMD=${CMD//$SIZE/$BIGSIZE} | |
# shellcheck disable=SC2086 | |
taskset -c 1 nice -n -20 setarch --addr-no-randomize \ | |
perf $PERF_OPT -o "$FILENAME.client.perf" $CMD > /dev/null 2>&1 | |
kill $PID | |
# Figure out if any performance difference to `main` is statistically relevant, and indicate that. | |
BASELINE="hyperfine-main/$FILENAME.json" | |
RESULT="hyperfine/$FILENAME.json" | |
BASELINE_MEAN=$(jq -r '.results[0].mean' "$BASELINE") | |
MEAN=$(jq -r '.results[0].mean' "$RESULT") | |
# Even though we tell hyperfine to use milliseconds, it still outputs in seconds when dumping to JSON. | |
DELTA=$(bc -l <<< "($MEAN - $BASELINE_MEAN) * 1000") | |
PERCENT=$(bc -l <<< "sqrt(($MEAN - $BASELINE_MEAN)^2) / ($BASELINE_MEAN + $MEAN)/2 * 100") | |
echo "Baseline: $BASELINE_MEAN, Result: $MEAN, Delta: $DELTA, Percent: $PERCENT" | |
# If a performance change is statistically significant, highlight it. | |
jq -r '.results[0].times[]' "$BASELINE" > baseline.txt | |
jq -r '.results[0].times[]' "$RESULT" > result.txt | |
if Rscript welch.R baseline.txt result.txt 2> /dev/null; then | |
if (( $(bc -l <<< "$DELTA > 0") )); then | |
echo "Performance has regressed: $BASELINE_MEAN -> $MEAN" | |
SYMBOL=":broken_heart:" | |
FORMAT='**' | |
else | |
echo "Performance has improved: $BASELINE_MEAN -> $MEAN" | |
SYMBOL=":green_heart:" | |
FORMAT='**' | |
fi | |
else | |
echo "No statistically significant change: $BASELINE_MEAN -> $MEAN" | |
fi | |
{ | |
grep -Ev '^\|(:| Command)' < "hyperfine/$FILENAME.md" | \ | |
sed -E 's/`//g; s/,/ \| /g;' | cut -f1-8 -d\| | tr -d '\n' | |
printf "| %s %s%.1f%s | %s%.1f%%%s |\n" \ | |
"$SYMBOL" "$FORMAT" "$DELTA" "$FORMAT" "$FORMAT" "$PERCENT" "$FORMAT" | |
} >> steps.md | |
done | |
done | |
done | |
done | |
# Make a single results table. | |
{ | |
echo "Transfer of $SIZE bytes over loopback, $RUNS runs. All unit-less numbers are in milliseconds." | |
echo | |
# shellcheck disable=SC2016 | |
echo '| Client | Server | CC | Pacing | Mean ± σ | Min | Max | Δ `main` | Δ `main` |' | |
echo '|:---|:---|:---|---|---:|---:|---:|---:|---:|' | |
cat steps.md | |
} > comparison.md | |
rm -r "$TMP" | |
# Re-enable turboboost, hyperthreading and use powersave governor. | |
- name: Restore machine | |
run: | | |
sudo /root/bin/unprep.sh | |
# In case the previous test failed: | |
sudo ip link set dev lo mtu 65536 | |
if: ${{ success() || failure() || cancelled() }} | |
- name: Post-process perf data | |
run: | | |
for f in *.perf; do | |
# Convert for profiler.firefox.com | |
perf script -i "$f" -F +pid > "$f.fx" & | |
# Generate perf reports | |
perf report -i "$f" --no-children --stdio > "$f.txt" & | |
# Generate flamegraphs | |
flamegraph --perfdata "$f" --palette rust -o "${f//.perf/.svg}" & | |
done | |
wait | |
rm neqo.svg | |
- name: Format results as Markdown | |
id: results | |
run: | | |
{ | |
echo "### Benchmark results" | |
echo | |
} > results.md | |
SHA=$(cat target/criterion/baseline-sha.txt || true) | |
if [ -n "$SHA" ]; then | |
{ | |
echo "Performance differences relative to $SHA." | |
echo | |
} | tee sha.md >> results.md | |
fi | |
sed -E -e 's/^ //gi' \ | |
-e 's/((change|time|thrpt):[^%]*% )([^%]*%)(.*)/\1<b>\3<\/b>\4/gi' results.txt |\ | |
perl -p -0777 -e 's/(.*?)\n(.*?)(((No change|Change within|Performance has).*?)(\nFound .*?)?)?\n\n/<details><summary>$1: $4<\/summary><pre>\n$2$6<\/pre><\/details>\n/gs' |\ | |
sed -E -e 's/(Performance has regressed.)/:broken_heart: <b>\1<\/b>/gi' \ | |
-e 's/(Performance has improved.)/:green_heart: <b>\1<\/b>/gi' \ | |
-e 's/^ +((<\/pre>|Found).*)/\1/gi' \ | |
-e 's/^<details>(.*Performance has.*)/<details open>\1/gi' >> results.md | |
{ | |
echo | |
echo "### Client/server transfer results" | |
SHA=$(cat target/criterion/baseline-sha.txt || true) | |
if [ -n "$SHA" ]; then | |
cat sha.md >> results.md | |
fi | |
cat comparison.md | |
} >> results.md | |
cat results.md > "$GITHUB_STEP_SUMMARY" | |
- name: Remember main-branch push URL | |
if: ${{ github.ref == 'refs/heads/main' }} | |
run: echo "${{ github.sha }}" > target/criterion/baseline-sha.txt | |
- name: Cache main-branch results | |
if: ${{ github.ref == 'refs/heads/main' }} | |
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 | |
with: | |
path: | | |
target/criterion | |
hyperfine | |
key: bench-results-${{ runner.name }}-${{ github.sha }} | |
- name: Export perf data | |
id: export | |
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 | |
with: | |
name: ${{ github.event.repository.name }}-${{ github.sha }} | |
path: | | |
*.svg | |
*.perf | |
*.perf.fx | |
*.txt | |
*.md | |
results.* | |
target/criterion | |
hyperfine | |
compression-level: 9 | |
- name: Export PR comment data | |
uses: ./.github/actions/pr-comment-data-export | |
with: | |
name: ${{ github.workflow }} | |
contents: results.md | |
log-url: ${{ steps.export.outputs.artifact-url }} |