improve benchmarks automation

This patch: - uses geomean instead of arithmetic mean for calculating summary - adds an option to run a benchmark a few times to pick a median value - adds a timeout for benchmarks, set at 10 minutes by default. - adds an option to filter out benchmarks by name - adds an option to pick a specific compiler commit to test with
pbalcer · Jul 26, 2024 · 615e612 · 615e612
1 parent 2baf095
commit 615e612
Show file tree

Hide file tree

Showing 11 changed files with 103 additions and 56 deletions.
diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks_compute.yml
@@ -34,15 +34,23 @@ on:
         type: string
         required: false
         default: ''
+      sycl_repo:
+        description: 'Compiler repo'
+        type: string
+        required: true
+        default: 'intel/llvm'
+      sycl_commit:
+        description: 'Compiler commit'
+        type: string
+        required: false
+        default: ''
 
 permissions:
   contents: read
   pull-requests: write
 
 jobs:
   e2e-build-hw:
-    # Run only on upstream; forks will not have the HW
-    # if: github.repository == 'oneapi-src/unified-runtime'
     name: Build SYCL, UR, run Compute Benchmarks
     strategy:
       matrix:
@@ -105,12 +113,19 @@ jobs:
     - name: Checkout SYCL
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
-        repository: intel/llvm
+        repository: ${{inputs.sycl_repo}}
         ref: refs/heads/sycl
         path: sycl-repo
         fetch-depth: 1
         fetch-tags: false
 
+    - name: Fetch specific SYCL commit
+      if: inputs.sycl_commit != ''
+      working-directory: ./sycl-repo
+      run: |
+        git fetch --depth=1 origin ${{ inputs.sycl_commit }}
+        git checkout ${{ inputs.sycl_commit }}
+
     - name: Set CUDA env vars
       if: matrix.adapter.str_name == 'cuda'
       run: |

diff --git a/scripts/benchmarks/benches/SobelFilter.py b/scripts/benchmarks/benches/SobelFilter.py
@@ -12,7 +12,10 @@
 class SobelFilter(VelocityBase):
     def __init__(self, vb: VelocityBench):
         super().__init__("sobel_filter", "sobel_filter", vb)
+
+    def download_deps(self):
         self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz")
+        return
 
     def name(self):
         return "Velocity-Bench Sobel Filter"

diff --git a/scripts/benchmarks/benches/api_overhead.py b/scripts/benchmarks/benches/api_overhead.py
@@ -11,12 +11,15 @@
 from .result import Result
 from .options import options
 
+## TODO: create a generic ComputeBenchmarks class that specific scenarios can inherit
 class APIOverheadSYCL(Benchmark):
-    def __init__(self, directory):
+    def __init__(self, ioq, directory):
+        self.ioq = ioq
         super().__init__(directory)
 
     def name(self):
-        return "api_overhead_benchmark_sycl, mean execution time per 10 kernels"
+        order = "in order" if self.ioq else "out of order"
+        return f"api_overhead_benchmark_sycl {order}, mean execution time per 10 kernels"
 
     def unit(self):
         return "μs"
@@ -39,11 +42,11 @@ def setup(self):
         run(f"cmake --build {build_path} -j", add_sycl=True)
         self.benchmark_bin = f"{build_path}/bin/api_overhead_benchmark_sycl"
 
-    def run_internal(self, ioq, env_vars):
+    def run(self, env_vars) -> Result:
         command = [
             f"{self.benchmark_bin}",
             "--test=SubmitKernel",
-            f"--Ioq={ioq}",
+            f"--Ioq={self.ioq}",
             "--DiscardEvents=0",
             "--MeasureCompletion=0",
             "--iterations=100000",
@@ -57,13 +60,6 @@ def run_internal(self, ioq, env_vars):
         (label, mean) = self.parse_output(result)
         return Result(label=label, value=mean, command=command, env=env_vars, stdout=result)
 
-    def run(self, env_vars) -> list[Result]:
-        results = []
-        for ioq in [0, 1]:
-            results.append(self.run_internal(ioq, env_vars))
-
-        return results
-
     def parse_output(self, output):
         csv_file = io.StringIO(output)
         reader = csv.reader(csv_file)

diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py
@@ -61,7 +61,7 @@ def unit(self):
     def setup(self):
         raise NotImplementedError()
 
-    def run(self, env_vars):
+    def run(self, env_vars) -> Result:
         raise NotImplementedError()
 
     def teardown(self):

diff --git a/scripts/benchmarks/benches/easywave.py b/scripts/benchmarks/benches/easywave.py
@@ -14,6 +14,8 @@
 class Easywave(VelocityBase):
     def __init__(self, vb: VelocityBench):
         super().__init__("easywave", "easyWave_sycl", vb)
+
+    def download_deps(self):
         self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz")
 
     def name(self):

diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py
@@ -5,6 +5,8 @@ class Options:
     sycl: str = ""
     rebuild: bool = True
     benchmark_cwd: str = "INVALID"
+    timeout: float = 600
+    iterations: int = 5
 
 options = Options()
 
diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py
@@ -15,10 +15,10 @@ def __init__(self, vb: VelocityBench):
         super().__init__("QuickSilver", "qs", vb)
         self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering")
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars) -> Result:
         # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
         if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0':
-            return []
+            return None
 
         return super().run(env_vars)
 

diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py
@@ -24,7 +24,12 @@ def __init__(self, name: str, bin_name: str, vb: VelocityBench):
         self.bin_name = bin_name
         self.code_path = os.path.join(self.vb.repo_path, self.bench_name, 'SYCL')
 
+    def download_deps(self):
+        return
+
     def setup(self):
+        self.download_deps()
+
         build_path = self.create_build_path(self.bench_name)
 
         configure_command = [
@@ -47,7 +52,7 @@ def extra_env_vars(self) -> dict:
     def parse_output(self, stdout: str) -> float:
         raise NotImplementedError()
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars) -> Result:
         env_vars.update(self.extra_env_vars())
 
         command = [
@@ -57,7 +62,7 @@ def run(self, env_vars) -> list[Result]:
 
         result = self.run_bench(command, env_vars)
 
-        return [Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)]
+        return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)
 
     def teardown(self):
         return
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
@@ -5,7 +5,6 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import os
 from utils.utils import prepare_workdir, load_benchmark_results, save_benchmark_results;
 from benches.api_overhead import APIOverheadSYCL
 from benches.hashtable import Hashtable
@@ -18,22 +17,19 @@
 from benches.options import options
 from output import generate_markdown
 import argparse
+import re
 
 # Update this if you are changing the layout of the results files
-INTERNAL_WORKDIR_VERSION = '1.0'
-
-def main(directory, additional_env_vars, save_name, compare_names):
-    variants = [
-        ({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '0'}, "Imm-CmdLists-OFF"),
-        ({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '1'}, ""),
-    ]
+INTERNAL_WORKDIR_VERSION = '1.4'
 
+def main(directory, additional_env_vars, save_name, compare_names, filter):
     prepare_workdir(directory, INTERNAL_WORKDIR_VERSION)
 
     vb = VelocityBench(directory)
 
     benchmarks = [
-        APIOverheadSYCL(directory),
+        APIOverheadSYCL(0, directory),
+        APIOverheadSYCL(1, directory),
         Hashtable(vb),
         Bitcracker(vb),
         #CudaSift(vb), TODO: the benchmark is passing, but is outputting "Failed to allocate device data"
@@ -42,22 +38,44 @@ def main(directory, additional_env_vars, save_name, compare_names):
         SobelFilter(vb)
     ]
 
+    if filter:
+        benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())]
+
     for benchmark in benchmarks:
+        print(f"setting up {benchmark.name()}... ", end='', flush=True)
         benchmark.setup()
+        print("complete.")
 
     results = []
     for benchmark in benchmarks:
-        for env_vars, extra_label in variants:
-            merged_env_vars = {**env_vars, **additional_env_vars}
+        merged_env_vars = {**additional_env_vars}
+        iteration_results = []
+        for iter in range(options.iterations):
+            print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
             bench_results = benchmark.run(merged_env_vars)
-            for res in bench_results:
-                res.unit = benchmark.unit()
-                res.name = benchmark.name()
-                res.label += f" {extra_label}"
-                results.append(res)
+            if bench_results is not None:
+                print(f"complete ({bench_results.value} {bench_results.unit}).")
+                iteration_results.append(bench_results)
+            else:
+                print(f"did not finish.")
+
+        if len(iteration_results) == 0:
+            continue
+
+        iteration_results.sort(key=lambda res: res.value)
+        median_index = len(iteration_results) // 2
+        median_result = iteration_results[median_index]
+
+        median_result.unit = benchmark.unit()
+        median_result.name = benchmark.name()
+
+        results.append(median_result)
+
 
     for benchmark in benchmarks:
+        print(f"tearing down {benchmark.name()}... ", end='', flush=True)
         benchmark.teardown()
+        print("complete.")
 
     chart_data = {"This PR" : results}
 
@@ -93,11 +111,18 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[])
     parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.')
     parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"])
+    parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5)
+    parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
+    parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
 
     options.rebuild = not args.no_rebuild
     options.sycl = args.sycl
+    options.iterations = args.iterations
+    options.timeout = args.timeout
+
+    benchmark_filter = re.compile(args.filter) if args.filter else None
 
-    main(args.benchmark_directory, additional_env_vars, args.save, args.compare)
+    main(args.benchmark_directory, additional_env_vars, args.save, args.compare, benchmark_filter)
diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py
@@ -5,6 +5,7 @@
 
 import collections
 from benches.base import Result
+import math
 
 # Function to generate the mermaid bar chart script
 def generate_mermaid_script(chart_data: dict[str, list[Result]]):
@@ -84,28 +85,28 @@ def generate_markdown_details(results: list[Result]):
     return "\n".join(markdown_sections)
 
 def generate_summary(chart_data: dict[str, list[Result]]) -> str:
-    # Calculate the mean value of "This PR" for each benchmark
-    this_pr_means = {}
+    # Calculate the geometric mean value of "This PR" for each benchmark
+    this_pr_geomeans = {}
     for res in chart_data["This PR"]:
-        if res.name not in this_pr_means:
-            this_pr_means[res.name] = []
-        this_pr_means[res.name].append(res.value)
-    for bname in this_pr_means:
-        this_pr_means[bname] = sum(this_pr_means[bname]) / len(this_pr_means[bname])
-
-    # Calculate the percentage for each entry relative to "This PR"
+        if res.name not in this_pr_geomeans:
+            this_pr_geomeans[res.name] = []
+        this_pr_geomeans[res.name].append(res.value)
+    for bname in this_pr_geomeans:
+        product = math.prod(this_pr_geomeans[bname])
+        this_pr_geomeans[bname] = product ** (1 / len(this_pr_geomeans[bname]))
+
+    # Calculate the percentage for each entry relative to "This PR" using geometric mean
     summary_data = {"This PR": 100}
     for entry_name, results in chart_data.items():
         if entry_name == "This PR":
             continue
-        entry_sum = 0
-        for res in results:
-            if res.name in this_pr_means:
-                percentage = (res.value / this_pr_means[res.name]) * 100
-                entry_sum += percentage
-
-        entry_average = entry_sum / len(results) if results else 0
-        summary_data[entry_name] = entry_average
+        entry_product = math.prod([res.value for res in results if res.name in this_pr_geomeans])
+        entry_geomean = entry_product ** (1 / len(results)) if results else 0
+        if entry_geomean and this_pr_geomeans.get(results[0].name):
+            percentage = (entry_geomean / this_pr_geomeans[results[0].name]) * 100
+        else:
+            percentage = 0
+        summary_data[entry_name] = percentage
 
     markdown_table = "| Name | Result % |\n| --- | --- |\n"
     for entry_name, percentage in summary_data.items():

diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py
@@ -28,9 +28,7 @@ def run(command, env_vars={}, cwd=None, add_sycl=False):
             env['LD_LIBRARY_PATH'] = sycl_lib_path + os.pathsep + env.get('LD_LIBRARY_PATH', '')
 
         env.update(env_vars)
-        result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) # nosec B603
-        print(result.stdout.decode())
-        print(result.stderr.decode())
+        result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=options.timeout) # nosec B603
         return result
     except subprocess.CalledProcessError as e:
         print(e.stdout.decode())