Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple results comparison in benchmark scripts #14196

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 25 additions & 28 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Orchestrates running benchmarks against DataFusion checkouts
Usage:
$0 data [benchmark]
$0 run [benchmark]
$0 compare <branch1> <branch2>
$0 compare <baseline> <branch1> <branch2> <...>
$0 venv

**********
Expand Down Expand Up @@ -125,7 +125,7 @@ done
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
COMMAND=${1:-"${COMMAND}"}
ARG2=$2
ARG3=$3
ARGS=${*:1}

# Do what is requested
main() {
Expand Down Expand Up @@ -296,7 +296,7 @@ main() {
echo "Done"
;;
compare)
compare_benchmarks "$ARG2" "$ARG3"
compare_benchmarks ${ARGS[@]}
;;
venv)
setup_venv
Expand Down Expand Up @@ -544,13 +544,13 @@ data_imdb() {

# Extract the dataset
tar -xzvf "${imdb_temp_gz}" -C "${imdb_dir}"
$CARGO_COMMAND --bin imdb -- convert --input ${imdb_dir} --output ${imdb_dir} --format parquet
$CARGO_COMMAND --bin imdb -- convert --input "${imdb_dir}" --output "${imdb_dir}" --format parquet
else
echo "IMDB.tgz already exists."

# Extract the dataset
tar -xzvf "${imdb_temp_gz}" -C "${imdb_dir}"
$CARGO_COMMAND --bin imdb -- convert --input ${imdb_dir} --output ${imdb_dir} --format parquet
$CARGO_COMMAND --bin imdb -- convert --input "${imdb_dir}" --output "${imdb_dir}" --format parquet
fi
echo "IMDB dataset downloaded and extracted."
else
Expand Down Expand Up @@ -715,35 +715,32 @@ run_sort_tpch() {


compare_benchmarks() {
BASE_RESULTS_DIR="${SCRIPT_DIR}/results"
BRANCH1="$1"
BRANCH2="$2"
if [ -z "$BRANCH1" ] ; then
echo "<branch1> not specified. Available branches:"
ls -1 "${BASE_RESULTS_DIR}"
exit 1
fi
RESULTS_DIR="${SCRIPT_DIR}/results"

if [ -z "$BRANCH2" ] ; then
echo "<branch2> not specified"
ls -1 "${BASE_RESULTS_DIR}"
exit 1
if [ "$#" -lt 3 ]; then
echo "Missing $((3 - $#)) arguments. Available branches:"
ls -1 "${RESULTS_DIR}"
exit 1
fi

echo "Comparing ${BRANCH1} and ${BRANCH2}"
for RESULTS_FILE1 in "${BASE_RESULTS_DIR}/${BRANCH1}"/*.json ; do
BENCH=$(basename "${RESULTS_FILE1}")
RESULTS_FILE2="${BASE_RESULTS_DIR}/${BRANCH2}/${BENCH}"
if test -f "${RESULTS_FILE2}" ; then
echo "--------------------"
echo "Benchmark ${BENCH}"
echo "--------------------"
PATH=$VIRTUAL_ENV/bin:$PATH python3 "${SCRIPT_DIR}"/compare.py "${RESULTS_FILE1}" "${RESULTS_FILE2}"
BASE_BRANCH=$2
BRANCHES=("${@:3}")
echo "Comparing ${BRANCHES[*]} to $BASE_BRANCH"

for BASE_FILE in "$RESULTS_DIR/$BASE_BRANCH"/*.json ; do
BENCH=$(basename "${BASE_FILE}")
AVAILABLE_FILES=()
for OTHER in "${BRANCHES[@]}"; do
if [ -f "${RESULTS_DIR}/${OTHER}/${BENCH}" ]; then
AVAILABLE_FILES+=("$RESULTS_DIR/$OTHER/$BENCH")
fi
done
if [ ${#AVAILABLE_FILES[@]} -eq 0 ]; then
echo "Note: Skipping ${BASE_FILE} as no other result available for comparison."
else
echo "Note: Skipping ${RESULTS_FILE1} as ${RESULTS_FILE2} does not exist"
PATH=$VIRTUAL_ENV/bin:$PATH python3 "${SCRIPT_DIR}"/compare.py "${BASE_FILE}" ${AVAILABLE_FILES[@]}
fi
done

}

setup_venv() {
Expand Down
121 changes: 76 additions & 45 deletions benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,76 +104,107 @@ def load_from_file(cls, path: Path) -> BenchmarkRun:

def compare(
baseline_path: Path,
comparison_path: Path,
comparison_paths: list[Path],
noise_threshold: float,
) -> None:
baseline = BenchmarkRun.load_from_file(baseline_path)
comparison = BenchmarkRun.load_from_file(comparison_path)
comparisons = list(map(lambda x: BenchmarkRun.load_from_file(x), comparison_paths))

console = Console()

# use basename as the column names
baseline_header = baseline_path.parent.stem
comparison_header = comparison_path.parent.stem

table = Table(show_header=True, header_style="bold magenta")
table.add_column("Query", style="dim", width=12)
table.add_column(baseline_header, justify="right", style="dim")
table.add_column(comparison_header, justify="right", style="dim")
table.add_column("Change", justify="right", style="dim")

faster_count = 0
slower_count = 0
no_change_count = 0
total_baseline_time = 0
total_comparison_time = 0

for baseline_result, comparison_result in zip(baseline.queries, comparison.queries):
assert baseline_result.query == comparison_result.query
for comparison_path in comparison_paths:
comparison_header = comparison_path.parent.stem
table.add_column(comparison_header, justify="right", style="dim")
table.add_column("Change on " + comparison_header, justify="right", style="dim")

def to_change_text(ratio: float) -> str:
if (1.0 - noise_threshold) <= ratio <= (1.0 + noise_threshold):
return "no change"
elif ratio < 1.0:
return f"+{(1 / ratio):.2f}x faster"
else:
return f"{ratio:.2f}x slower"

total_baseline_time += baseline_result.execution_time
total_comparison_time += comparison_result.execution_time
for baseline_result, comparison_results in zip(
baseline.queries, zip(*[run.queries for run in comparisons])
):
for result in comparison_results:
assert baseline_result.query == result.query

change = comparison_result.execution_time / baseline_result.execution_time
execution_time_texts = list(
map(lambda x: f"{x.execution_time:.2f}ms", comparison_results)
)

if (1.0 - noise_threshold) <= change <= (1.0 + noise_threshold):
change_text = "no change"
no_change_count += 1
elif change < 1.0:
change_text = f"+{(1 / change):.2f}x faster"
faster_count += 1
else:
change_text = f"{change:.2f}x slower"
slower_count += 1
changes = map(
lambda x: x.execution_time / baseline_result.execution_time,
comparison_results,
)
change_texts = list(map(to_change_text, changes))

table.add_row(
f"Q{baseline_result.query}",
f"{baseline_result.execution_time:.2f}ms",
f"{comparison_result.execution_time:.2f}ms",
change_text,
*(
[f"Q{baseline_result.query}", f"{baseline_result.execution_time:.2f}ms"]
+ [
x
for i in range(len(change_texts))
for x in (execution_time_texts[i], change_texts[i])
]
)
)

console.print(table)

# Calculate averages
avg_baseline_time = total_baseline_time / len(baseline.queries)
avg_comparison_time = total_comparison_time / len(comparison.queries)

# Summary table
summary_table = Table(show_header=True, header_style="bold magenta")
summary_table.add_column("Benchmark Summary", justify="left", style="dim")
summary_table.add_column("", justify="right", style="dim")

summary_table.add_row(f"Total Time ({baseline_header})", f"{total_baseline_time:.2f}ms")
summary_table.add_row(f"Total Time ({comparison_header})", f"{total_comparison_time:.2f}ms")
summary_table.add_row(f"Average Time ({baseline_header})", f"{avg_baseline_time:.2f}ms")
summary_table.add_row(f"Average Time ({comparison_header})", f"{avg_comparison_time:.2f}ms")
summary_table.add_row("Queries Faster", str(faster_count))
summary_table.add_row("Queries Slower", str(slower_count))
summary_table.add_row("Queries with No Change", str(no_change_count))
# baseline info
total_baseline_time = sum(map(lambda x: x.execution_time, baseline.queries))
avg_baseline_time = total_baseline_time / len(baseline.queries)

summary_table.add_row(
f"Total Time ({baseline_header})", f"{total_baseline_time:.2f}ms"
)
summary_table.add_row(
f"Average Time ({baseline_header})", f"{avg_baseline_time:.2f}ms"
)

# info for each comparison
for path, result in zip(comparison_paths, comparisons):
header = path.parent.stem

total_time = sum(map(lambda x: x.execution_time, result.queries))
average_time = total_time / len(result.queries)

time_ratios = list(
map(
lambda x: x[0].execution_time / x[1].execution_time - 1,
zip(baseline.queries, result.queries),
)
)

faster_count = len(list(filter(lambda x: x > noise_threshold, time_ratios)))
slower_count = len(list(filter(lambda x: x < -noise_threshold, time_ratios)))
no_change_count = len(time_ratios) - faster_count - slower_count

summary_table.add_row(f"Total Time ({header})", f"{total_time:.2f}ms")
summary_table.add_row(f"Average Time ({header})", f"{average_time:.2f}ms")

summary_table.add_row(f"Queries Faster ({header})", str(faster_count))
summary_table.add_row(f"Queries Slower ({header})", str(slower_count))
summary_table.add_row(
f"Queries with No Change ({header})", str(no_change_count)
)

console.print(summary_table)


def main() -> None:
parser = ArgumentParser()
compare_parser = parser
Expand All @@ -183,8 +214,9 @@ def main() -> None:
help="Path to the baseline summary file.",
)
compare_parser.add_argument(
"comparison_path",
"comparison_paths",
type=Path,
nargs="+",
help="Path to the comparison summary file.",
)
compare_parser.add_argument(
Expand All @@ -196,8 +228,7 @@ def main() -> None:

options = parser.parse_args()

compare(options.baseline_path, options.comparison_path, options.noise_threshold)

compare(options.baseline_path, options.comparison_paths, options.noise_threshold)


if __name__ == "__main__":
Expand Down
Loading