From 9f060d513403ed9aec54c48170b0885cde3065c7 Mon Sep 17 00:00:00 2001
From: Philippe Pepiot <philippe.pepiot@logilab.fr>
Date: Fri, 24 Nov 2017 14:35:57 +0100
Subject: [PATCH] Allow to filter param selection to run using --bench

By extending --bench command to match against benchmark_name('param1',
'param2', ...) when the benchmark is parameterized.

Skipped params combination are displayed as "n/a" as for skipped
benchmarks with NaN as result. In this case we keep eventual previous
existing result in result json file.
---
 asv/benchmarks.py                 | 36 +++++++++++++++++++++++++++----
 asv/commands/profiling.py         |  4 +++-
 asv/results.py                    | 19 +++++++++++++++-
 test/benchmark/params_examples.py |  8 +++++++
 test/test_benchmarks.py           | 18 ++++++++++++++--
 test/test_workflow.py             | 27 +++++++++++++++++++++++
 6 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/asv/benchmarks.py b/asv/benchmarks.py
index 59b353960..f7eb7616d 100644
--- a/asv/benchmarks.py
+++ b/asv/benchmarks.py
@@ -33,7 +33,7 @@
 
 
 def run_benchmark(benchmark, root, env, show_stderr=False,
-                  quick=False, profile=False, cwd=None):
+                  quick=False, profile=False, cwd=None, selected_idx=None):
     """
     Run a benchmark in different process in the given environment.
 
@@ -60,6 +60,10 @@ def run_benchmark(benchmark, root, env, show_stderr=False,
         The path to the current working directory to use when running
         the benchmark process.
 
+    selected_idx : str, optional
+        The list of parameters combination to run benchmark on. By default
+        run all combinations.
+
     Returns
     -------
     result : dict
@@ -120,6 +124,13 @@ def log_result(msg):
         result['started_at'] = datetime.datetime.utcnow()
 
         for param_idx, params in param_iter:
+            if (selected_idx is not None and benchmark['params']
+                    and param_idx not in selected_idx):
+                # Use NaN to mark the result as skipped
+                bench_results.append(dict(samples=None, number=None,
+                                          result=float('nan'), stats=None))
+                bench_profiles.append(None)
+                continue
             success, data, profile_data, err, out, errcode = \
                 _run_benchmark_single(
                     benchmark, root, env, param_idx,
@@ -326,6 +337,9 @@ def __init__(self, conf, benchmarks, regex=None):
             `regex` is a list of regular expressions matching the
             benchmarks to run.  If none are provided, all benchmarks
             are run.
+            For parameterized benchmarks, the regex match against
+            `funcname(param0, param1, ...)` to include the parameter
+            combination in regex filtering.
         """
         self._conf = conf
         self._benchmark_dir = conf.benchmark_dir
@@ -336,10 +350,23 @@ def __init__(self, conf, benchmarks, regex=None):
             regex = [regex]
 
         self._all_benchmarks = {}
+        self._benchmark_selection = {}
         for benchmark in benchmarks:
             self._all_benchmarks[benchmark['name']] = benchmark
-            if not regex or any(re.search(reg, benchmark['name']) for reg in regex):
-                self[benchmark['name']] = benchmark
+            if benchmark['params']:
+                self._benchmark_selection[benchmark['name']] = []
+                for idx, param_set in enumerate(
+                        itertools.product(*benchmark['params'])):
+                    name = '%s(%s)' % (
+                        benchmark['name'],
+                        ', '.join(param_set))
+                    if not regex or any(re.search(reg, name) for reg in regex):
+                        self[benchmark['name']] = benchmark
+                        self._benchmark_selection[benchmark['name']].append(idx)
+            else:
+                self._benchmark_selection[benchmark['name']] = None
+                if not regex or any(re.search(reg, benchmark['name']) for reg in regex):
+                    self[benchmark['name']] = benchmark
 
     @classmethod
     def discover(cls, conf, repo, environments, commit_hash, regex=None):
@@ -616,7 +643,8 @@ def run_benchmarks(self, env, show_stderr=False, quick=False, profile=False,
                             benchmark, self._benchmark_dir, env,
                             show_stderr=show_stderr,
                             quick=quick, profile=profile,
-                            cwd=tmpdir)
+                            cwd=tmpdir,
+                            selected_idx=self._benchmark_selection[benchmark['name']])
                 finally:
                     shutil.rmtree(tmpdir, True)
 
diff --git a/asv/commands/profiling.py b/asv/commands/profiling.py
index fee0ec31d..bdba84674 100644
--- a/asv/commands/profiling.py
+++ b/asv/commands/profiling.py
@@ -49,7 +49,9 @@ def setup_arguments(cls, subparsers):
         parser.add_argument(
             'benchmark',
             help="""The benchmark to profile.  Must be a
-            fully-specified benchmark name.""")
+            fully-specified benchmark name. For parameterized benchmark, it
+            must include the parameter combination to use, e.g.:
+            benchmark_name(param0, param1, ...)""")
         parser.add_argument(
             'revision', nargs='?',
             help="""The revision of the project to profile.  May be a
diff --git a/asv/results.py b/asv/results.py
index 55c99a156..6b7ce10c8 100644
--- a/asv/results.py
+++ b/asv/results.py
@@ -564,7 +564,7 @@ def add_existing_results(self, old):
         Add any existing old results that aren't overridden by the
         current results.
         """
-        for dict_name in ('_results', '_samples', '_number', '_stats',
+        for dict_name in ('_samples', '_number', '_stats',
                           '_benchmark_params', '_profiles', '_started_at',
                           '_ended_at', '_benchmark_version'):
             old_dict = getattr(old, dict_name)
@@ -572,6 +572,23 @@ def add_existing_results(self, old):
             for key, val in six.iteritems(old_dict):
                 if key not in new_dict:
                     new_dict[key] = val
+        new_results = self._results
+        old_results = old._results
+        for key, val in six.iteritems(old_results):
+            if key not in new_results:
+                new_results[key] = val
+            elif self._benchmark_params[key]:
+                old_benchmark_results = {}
+                for idx, param_set in enumerate(itertools.product(
+                        *old._benchmark_params[key])):
+                    old_benchmark_results[param_set] = val[idx]
+                for idx, param_set in enumerate(itertools.product(
+                        *self._benchmark_params[key])):
+                    # when new result is skipped (NaN), keep previous result.
+                    if (util.is_nan(new_results[key][idx]) and
+                            old_benchmark_results.get(param_set) is not None):
+                        new_results[key][idx] = (
+                            old_benchmark_results[param_set])
 
     def rm(self, result_dir):
         path = os.path.join(result_dir, self._filename)
diff --git a/test/benchmark/params_examples.py b/test/benchmark/params_examples.py
index cd0db6367..50eacdbd5 100644
--- a/test/benchmark/params_examples.py
+++ b/test/benchmark/params_examples.py
@@ -80,3 +80,11 @@ def track_find_test(n):
     return asv_test_repo.dummy_value[n - 1]
 
 track_find_test.params = [1, 2]
+
+
+def track_param_selection(a, b):
+    return a + b
+
+
+track_param_selection.param_names = ['a', 'b']
+track_param_selection.params = [[1, 2], [3, 5]]
diff --git a/test/test_benchmarks.py b/test/test_benchmarks.py
index 3e30fec41..f5dac5005 100644
--- a/test/test_benchmarks.py
+++ b/test/test_benchmarks.py
@@ -65,7 +65,7 @@ def test_find_benchmarks(tmpdir):
 
     b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash],
                                        regex='example')
-    assert len(b) == 25
+    assert len(b) == 26
 
     b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash],
                               regex='time_example_benchmark_1')
@@ -83,8 +83,22 @@ def test_find_benchmarks(tmpdir):
     assert b['custom.time_function']['pretty_name'] == 'My Custom Function'
     assert b['named.track_custom_pretty_name']['pretty_name'] == 'this.is/the.answer'
 
+    # benchmark param selection with regex
+    b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash],
+                                       regex='track_param_selection\(.*, 3\)')
+    assert list(b.keys()) == ['params_examples.track_param_selection']
+    assert b._benchmark_selection['params_examples.track_param_selection'] == [0, 2]
+    b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash],
+                                       regex='track_param_selection\(1, ')
+    assert list(b.keys()) == ['params_examples.track_param_selection']
+    assert b._benchmark_selection['params_examples.track_param_selection'] == [0, 1]
+    b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash],
+                                       regex='track_param_selection')
+    assert list(b.keys()) == ['params_examples.track_param_selection']
+    assert b._benchmark_selection['params_examples.track_param_selection'] == [0, 1, 2, 3]
+
     b = benchmarks.Benchmarks.discover(conf, repo, envs, [commit_hash])
-    assert len(b) == 35
+    assert len(b) == 36
 
     assert 'named.OtherSuite.track_some_func' in b
 
diff --git a/test/test_workflow.py b/test/test_workflow.py
index b0bc7b133..2959f0055 100644
--- a/test/test_workflow.py
+++ b/test/test_workflow.py
@@ -348,3 +348,30 @@ def test_run_with_repo_subdir(basic_conf_with_subdir):
     data = util.load_json(fn_results)
     assert data['results'][bench_name] == {'params': [['1', '2']],
                                            'result': [6, 6]}
+
+
+def test_benchmark_param_selection(basic_conf):
+    tmpdir, local, conf, machine_file = basic_conf
+    conf.matrix = {}
+    tools.generate_test_repo(tmpdir, values=[(1, 2, 3)])
+    tools.run_asv_with_conf(conf, 'run', 'master^!',
+                            '--quick', '--show-stderr',
+                            '--bench', 'track_param_selection\(.*, 3\)',
+                            _machine_file=machine_file)
+
+    def get_results():
+        results = util.load_json(glob.glob(join(
+            tmpdir, 'results_workflow', 'orangutan', '*-*.json'))[0])
+        # replacing NaN by 'n/a' make assertions easier
+        return ['n/a' if util.is_nan(item) else item
+                for item in results['results'][
+                    'params_examples.track_param_selection']['result']]
+
+    assert get_results() == [4, 'n/a', 5, 'n/a']
+    tools.run_asv_with_conf(conf, 'run', '--show-stderr',
+                            '--bench', 'track_param_selection\(1, ',
+                            _machine_file=machine_file)
+    assert get_results() == [4, 6, 5, 'n/a']
+    tools.run_asv_with_conf(conf, 'run', '--show-stderr',
+                            '--bench', 'track_param_selection',
+                            _machine_file=machine_file)