apache · Eason0729 · Jan 19, 2025 · Jan 19, 2025 · Jan 19, 2025 · Jan 24, 2025
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
@@ -45,7 +45,7 @@ Orchestrates running benchmarks against DataFusion checkouts
 Usage:
 $0 data [benchmark]
 $0 run [benchmark]
-$0 compare <branch1> <branch2>
+$0 compare <baseline> <branch1> <branch2> <...>
 $0 venv
 
 **********
@@ -125,7 +125,7 @@ done
 set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
 COMMAND=${1:-"${COMMAND}"}
 ARG2=$2
-ARG3=$3
+ARGS=${*:1}
 
 # Do what is requested
 main() {
@@ -296,7 +296,7 @@ main() {
             echo "Done"
             ;;
         compare)
-            compare_benchmarks "$ARG2" "$ARG3"
+            compare_benchmarks ${ARGS[@]}
             ;;
         venv)
             setup_venv
@@ -544,13 +544,13 @@ data_imdb() {
 
             # Extract the dataset
             tar -xzvf "${imdb_temp_gz}" -C "${imdb_dir}"
-            $CARGO_COMMAND --bin imdb -- convert --input ${imdb_dir} --output ${imdb_dir} --format parquet
+            $CARGO_COMMAND --bin imdb -- convert --input "${imdb_dir}" --output "${imdb_dir}" --format parquet
         else 
             echo "IMDB.tgz already exists."
 
             # Extract the dataset
             tar -xzvf "${imdb_temp_gz}" -C "${imdb_dir}"
-            $CARGO_COMMAND --bin imdb -- convert --input ${imdb_dir} --output ${imdb_dir} --format parquet
+            $CARGO_COMMAND --bin imdb -- convert --input "${imdb_dir}" --output "${imdb_dir}" --format parquet
         fi
         echo "IMDB dataset downloaded and extracted."
     else
@@ -715,35 +715,32 @@ run_sort_tpch() {
 
 
 compare_benchmarks() {
-    BASE_RESULTS_DIR="${SCRIPT_DIR}/results"
-    BRANCH1="$1"
-    BRANCH2="$2"
-    if [ -z "$BRANCH1" ] ; then
-        echo "<branch1> not specified. Available branches:"
-        ls -1 "${BASE_RESULTS_DIR}"
-        exit 1
-    fi
+    RESULTS_DIR="${SCRIPT_DIR}/results"
 
-    if [ -z "$BRANCH2" ] ; then
-        echo "<branch2> not specified"
-        ls -1 "${BASE_RESULTS_DIR}"
-        exit 1
+    if [ "$#" -lt 3 ]; then
+      echo "Missing $((3 - $#)) arguments. Available branches:"
+      ls -1 "${RESULTS_DIR}"
+      exit 1
     fi
 
-    echo "Comparing ${BRANCH1} and ${BRANCH2}"
-    for RESULTS_FILE1 in "${BASE_RESULTS_DIR}/${BRANCH1}"/*.json ; do
-	BENCH=$(basename "${RESULTS_FILE1}")
-        RESULTS_FILE2="${BASE_RESULTS_DIR}/${BRANCH2}/${BENCH}"
-        if test -f "${RESULTS_FILE2}" ; then
-            echo "--------------------"
-            echo "Benchmark ${BENCH}"
-            echo "--------------------"
-            PATH=$VIRTUAL_ENV/bin:$PATH python3 "${SCRIPT_DIR}"/compare.py "${RESULTS_FILE1}" "${RESULTS_FILE2}"
+    BASE_BRANCH=$2
+    BRANCHES=("${@:3}")
+    echo "Comparing ${BRANCHES[*]} to $BASE_BRANCH"
+
+    for BASE_FILE in "$RESULTS_DIR/$BASE_BRANCH"/*.json ; do
+        BENCH=$(basename "${BASE_FILE}")
+        AVAILABLE_FILES=()
+        for OTHER in "${BRANCHES[@]}"; do
+          if [ -f "${RESULTS_DIR}/${OTHER}/${BENCH}" ]; then
+            AVAILABLE_FILES+=("$RESULTS_DIR/$OTHER/$BENCH")
+          fi
+        done
+        if [ ${#AVAILABLE_FILES[@]} -eq 0 ]; then
+          echo "Note: Skipping ${BASE_FILE} as no other result available for comparison."
         else
-            echo "Note: Skipping ${RESULTS_FILE1} as ${RESULTS_FILE2} does not exist"
+           PATH=$VIRTUAL_ENV/bin:$PATH python3 "${SCRIPT_DIR}"/compare.py "${BASE_FILE}" ${AVAILABLE_FILES[@]}
         fi
     done
-
 }
 
 setup_venv() {

diff --git a/benchmarks/compare.py b/benchmarks/compare.py
@@ -104,76 +104,107 @@ def load_from_file(cls, path: Path) -> BenchmarkRun:
 
 def compare(
     baseline_path: Path,
-    comparison_path: Path,
+    comparison_paths: list[Path],
     noise_threshold: float,
 ) -> None:
     baseline = BenchmarkRun.load_from_file(baseline_path)
-    comparison = BenchmarkRun.load_from_file(comparison_path)
+    comparisons = list(map(lambda x: BenchmarkRun.load_from_file(x), comparison_paths))
 
     console = Console()
 
-    # use basename as the column names
     baseline_header = baseline_path.parent.stem
-    comparison_header = comparison_path.parent.stem
 
     table = Table(show_header=True, header_style="bold magenta")
     table.add_column("Query", style="dim", width=12)
     table.add_column(baseline_header, justify="right", style="dim")
-    table.add_column(comparison_header, justify="right", style="dim")
-    table.add_column("Change", justify="right", style="dim")
-
-    faster_count = 0
-    slower_count = 0
-    no_change_count = 0
-    total_baseline_time = 0
-    total_comparison_time = 0
-
-    for baseline_result, comparison_result in zip(baseline.queries, comparison.queries):
-        assert baseline_result.query == comparison_result.query
+    for comparison_path in comparison_paths:
+        comparison_header = comparison_path.parent.stem
+        table.add_column(comparison_header, justify="right", style="dim")
+        table.add_column("Change on " + comparison_header, justify="right", style="dim")
+
+    def to_change_text(ratio: float) -> str:
+        if (1.0 - noise_threshold) <= ratio <= (1.0 + noise_threshold):
+            return "no change"
+        elif ratio < 1.0:
+            return f"+{(1 / ratio):.2f}x faster"
+        else:
+            return f"{ratio:.2f}x slower"
 
-        total_baseline_time += baseline_result.execution_time
-        total_comparison_time += comparison_result.execution_time
+    for baseline_result, comparison_results in zip(
+        baseline.queries, zip(*[run.queries for run in comparisons])
+    ):
+        for result in comparison_results:
+            assert baseline_result.query == result.query
 
-        change = comparison_result.execution_time / baseline_result.execution_time
+        execution_time_texts = list(
+            map(lambda x: f"{x.execution_time:.2f}ms", comparison_results)
+        )
 
-        if (1.0 - noise_threshold) <= change <= (1.0 + noise_threshold):
-            change_text = "no change"
-            no_change_count += 1
-        elif change < 1.0:
-            change_text = f"+{(1 / change):.2f}x faster"
-            faster_count += 1
-        else:
-            change_text = f"{change:.2f}x slower"
-            slower_count += 1
+        changes = map(
+            lambda x: x.execution_time / baseline_result.execution_time,
+            comparison_results,
+        )
+        change_texts = list(map(to_change_text, changes))
 
         table.add_row(
-            f"Q{baseline_result.query}",
-            f"{baseline_result.execution_time:.2f}ms",
-            f"{comparison_result.execution_time:.2f}ms",
-            change_text,
+            *(
+                [f"Q{baseline_result.query}", f"{baseline_result.execution_time:.2f}ms"]
+                + [
+                    x
+                    for i in range(len(change_texts))
+                    for x in (execution_time_texts[i], change_texts[i])
+                ]
+            )
         )
 
     console.print(table)
 
-    # Calculate averages
-    avg_baseline_time = total_baseline_time / len(baseline.queries)
-    avg_comparison_time = total_comparison_time / len(comparison.queries)
-
     # Summary table
     summary_table = Table(show_header=True, header_style="bold magenta")
     summary_table.add_column("Benchmark Summary", justify="left", style="dim")
     summary_table.add_column("", justify="right", style="dim")
 
-    summary_table.add_row(f"Total Time ({baseline_header})", f"{total_baseline_time:.2f}ms")
-    summary_table.add_row(f"Total Time ({comparison_header})", f"{total_comparison_time:.2f}ms")
-    summary_table.add_row(f"Average Time ({baseline_header})", f"{avg_baseline_time:.2f}ms")
-    summary_table.add_row(f"Average Time ({comparison_header})", f"{avg_comparison_time:.2f}ms")
-    summary_table.add_row("Queries Faster", str(faster_count))
-    summary_table.add_row("Queries Slower", str(slower_count))
-    summary_table.add_row("Queries with No Change", str(no_change_count))
+    # baseline info
+    total_baseline_time = sum(map(lambda x: x.execution_time, baseline.queries))
+    avg_baseline_time = total_baseline_time / len(baseline.queries)
+
+    summary_table.add_row(
+        f"Total Time ({baseline_header})", f"{total_baseline_time:.2f}ms"
+    )
+    summary_table.add_row(
+        f"Average Time ({baseline_header})", f"{avg_baseline_time:.2f}ms"
+    )
+
+    # info for each comparison
+    for path, result in zip(comparison_paths, comparisons):
+        header = path.parent.stem
+
+        total_time = sum(map(lambda x: x.execution_time, result.queries))
+        average_time = total_time / len(result.queries)
+
+        time_ratios = list(
+            map(
+                lambda x: x[0].execution_time / x[1].execution_time - 1,
+                zip(baseline.queries, result.queries),
+            )
+        )
+
+        faster_count = len(list(filter(lambda x: x > noise_threshold, time_ratios)))
+        slower_count = len(list(filter(lambda x: x < -noise_threshold, time_ratios)))
+        no_change_count = len(time_ratios) - faster_count - slower_count
+
+        summary_table.add_row(f"Total Time ({header})", f"{total_time:.2f}ms")
+        summary_table.add_row(f"Average Time ({header})", f"{average_time:.2f}ms")
+
+        summary_table.add_row(f"Queries Faster ({header})", str(faster_count))
+        summary_table.add_row(f"Queries Slower ({header})", str(slower_count))
+        summary_table.add_row(
+            f"Queries with No Change ({header})", str(no_change_count)
+        )
 
     console.print(summary_table)
 
+
 def main() -> None:
     parser = ArgumentParser()
     compare_parser = parser
@@ -183,8 +214,9 @@ def main() -> None:
         help="Path to the baseline summary file.",
     )
     compare_parser.add_argument(
-        "comparison_path",
+        "comparison_paths",
         type=Path,
+        nargs="+",
         help="Path to the comparison summary file.",
     )
     compare_parser.add_argument(
@@ -196,8 +228,7 @@ def main() -> None:
 
     options = parser.parse_args()
 
-    compare(options.baseline_path, options.comparison_path, options.noise_threshold)
-
+    compare(options.baseline_path, options.comparison_paths, options.noise_threshold)
 
 
 if __name__ == "__main__":