Skip to content

Commit

Permalink
Add hooks for calling arbitrary code around each benchmark (#193)
Browse files Browse the repository at this point in the history
* Add hooks for calling arbitrary code around each benchmark

* Add setuptools as a dependency

* Use importlib.metadata instead

* Add testing

* Add next version

* Support older Python versions

* Support older Pythons

* Support newer Pythons

* Fix arg parsing

* Fix hook check

* Add active hooks to metadata

* Only load hooks actually in use

* Fix tests
  • Loading branch information
mdboom committed Jul 28, 2024
1 parent 787f53e commit c6c33d9
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 26 deletions.
4 changes: 3 additions & 1 deletion doc/run_benchmark.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,11 @@ Specializer statistics (``pystats``)
==================================

``pyperf`` has built-in support for `specializer statistics (``pystats``) <https://docs.python.org/dev/using/configure.html#cmdoption-enable-pystats>`_.
If running benchmarks on a CPython built with the ``--enable-pystats`` flag, pyperf will automatically collect ``pystats`` on the benchmark code by calling ``sys._stats_on`` immediately before the benchmark and calling ``sys._stats_off`` immediately after.
If running benchmarks on a CPython built with the ``--enable-pystats`` flag, when you pass ``--hook pystats``, pyperf will collect ``pystats`` on the benchmark code by calling ``sys._stats_on`` immediately before the benchmark and calling ``sys._stats_off`` immediately after.
Stats are not collected when running ``pyperf``'s own code or when warming up or calibrating the benchmarks.

**New in 2.8.0:** The ``--hook pystats`` flag must be given to collect pystats.

Due to the overhead of collecting the statistics, the timing results will be meaningless.

The `Tools/scripts/summarize_stats.py <https://github.com/python/cpython/blob/main/Tools/scripts/summarize_stats.py>`_ script can be used to summarize the statistics in a human-readable form.
Expand Down
4 changes: 0 additions & 4 deletions pyperf/_collect_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,6 @@ def collect_python_metadata(metadata):
if not gc.isenabled():
metadata['python_gc'] = 'disabled'

# pystats enabled?
if hasattr(sys, "_stats_clear"):
metadata['pystats'] = 'enabled'


def read_proc(path):
path = proc_path(path)
Expand Down
97 changes: 97 additions & 0 deletions pyperf/_hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Hooks are installable context managers defined as entry points so that
# arbitrary code can by run right before and after the actual internal
# benchmarking code is run.


import abc
import importlib.metadata
import sys


def get_hooks():
hook_prefix = "pyperf.hook"
entry_points = importlib.metadata.entry_points()
if sys.version_info[:2] < (3, 10):
group = entry_points[hook_prefix]
else:
group = entry_points.select(group=hook_prefix)
return group


def get_hook_names():
return (x.name for x in get_hooks())


def get_selected_hooks(hook_names):
if hook_names is None:
return

hook_mapping = {hook.name: hook for hook in get_hooks()}
for hook_name in hook_names:
yield hook_mapping[hook_name]


class HookError(Exception):
pass


class HookBase(abc.ABC):
def __init__(self):
"""
Create a new instance of the hook.
"""
pass

def teardown(self, _metadata):
"""
Called when the hook is completed for a process. May add any information
collected to the passed-in `metadata` dictionary.
"""
pass

def __enter__(self):
"""
Called immediately before running benchmark code.
May be called multiple times per instance.
"""
pass

def __exit__(self, _exc_type, _exc_value, _traceback):
"""
Called immediately after running benchmark code.
"""
pass


class _test_hook(HookBase):
def __init__(self):
self._count = 0

def teardown(self, metadata):
metadata["_test_hook"] = self._count

def __enter__(self):
self._count += 1

def __exit__(self, _exc_type, _exc_value, _traceback):
pass


class pystats(HookBase):
def __init__(self):
if not hasattr(sys, "_stats_on"):
raise HookError(
"Can not collect pystats because python was not built with --enable-pystats"
)
sys._stats_off()
sys._stats_clear()

def teardown(self, metadata):
metadata["pystats"] = "enabled"

def __enter__(self):
sys._stats_on()

def __exit__(self, _exc_type, _exc_value, _traceback):
sys._stats_off()
4 changes: 4 additions & 0 deletions pyperf/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def worker_cmd(self, calibrate_loops, calibrate_warmups, wpipe):
if args.profile:
cmd.extend(['--profile', args.profile])

if args.hook:
for hook in args.hook:
cmd.extend(['--hook', hook])

if self.runner._add_cmdline_args:
self.runner._add_cmdline_args(cmd, args)

Expand Down
19 changes: 12 additions & 7 deletions pyperf/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
get_isolated_cpus, set_cpu_affinity,
set_highest_priority)
from pyperf._formatter import format_timedelta
from pyperf._hooks import get_hook_names
from pyperf._utils import (MS_WINDOWS, MAC_OS, abs_executable,
WritePipe, get_python_names,
merge_profile_stats)
Expand Down Expand Up @@ -77,13 +78,6 @@ def __init__(self, values=None, processes=None,
program_args=None, add_cmdline_args=None,
_argparser=None, warmups=1):

# Reset the stats collection if running a --enable-pystats build
try:
sys._stats_off()
sys._stats_clear()
except AttributeError:
pass

# Watchdog: ensure that only once instance of Runner (or a Runner
# subclass) is created per process to prevent bad surprises
cls = self.__class__
Expand Down Expand Up @@ -248,6 +242,13 @@ def __init__(self, values=None, processes=None,
help='Collect profile data using cProfile '
'and output to the given file.')

hook_names = list(get_hook_names())
parser.add_argument(
'--hook', action="append", choices=hook_names,
metavar=f"{', '.join(x for x in hook_names if not x.startswith('_'))}",
help='Use the given pyperf hooks'
)

memory = parser.add_mutually_exclusive_group()
memory.add_argument('--tracemalloc', action="store_true",
help='Trace memory allocations using tracemalloc')
Expand Down Expand Up @@ -732,6 +733,10 @@ def bench_command(self, name, command):
if self.args.profile:
command.extend(["--profile", self.args.profile])

if self.args.hook:
for hook in self.args.hook:
command.extend(["--hook", hook])

# Use lazy import to limit imports on 'import pyperf'
from pyperf._command import BenchCommandTask
task = BenchCommandTask(self, name, command)
Expand Down
34 changes: 20 additions & 14 deletions pyperf/_worker.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import contextlib
import statistics
import sys
import time

import pyperf
from pyperf._formatter import (format_number, format_value, format_values,
format_timedelta)
from pyperf._hooks import get_selected_hooks, HookError
from pyperf._utils import MS_WINDOWS, MAC_OS, percentile, median_abs_dev


Expand Down Expand Up @@ -58,19 +60,16 @@ def _compute_values(self, values, nvalue,

task_func = self.task_func

# If we are on a pystats build, turn on stats collection around the
# actual work, except when calibrating.
if hasattr(sys, "_stats_on") and not calibrate_loops:
core_task_func = task_func

def stats_func(*args):
sys._stats_on()
try:
return core_task_func(*args)
finally:
sys._stats_off()

task_func = stats_func
hook_managers = {}
for hook in get_selected_hooks(args.hook):
try:
hook_managers[hook.name] = hook.load()()
except HookError as e:
print(f"ERROR setting up hook '{hook.__name__}:'", file=sys.stderr)
print(str(e), file=sys.stderr)
sys.exit(1)
if len(hook_managers):
self.metadata["hooks"] = ", ".join(hook_managers.keys())

index = 1
inner_loops = self.inner_loops
Expand All @@ -80,7 +79,11 @@ def stats_func(*args):
if index > nvalue:
break

raw_value = task_func(self, self.loops)
with contextlib.ExitStack() as stack:
for hook in hook_managers.values():
stack.enter_context(hook)
raw_value = task_func(self, self.loops)

raw_value = float(raw_value)
value = raw_value / (self.loops * inner_loops)

Expand Down Expand Up @@ -116,6 +119,9 @@ def stats_func(*args):

index += 1

for hook in hook_managers.values():
hook.teardown(self.metadata)

def collect_metadata(self):
from pyperf._collect_metadata import collect_metadata
return collect_metadata(process=False)
Expand Down
13 changes: 13 additions & 0 deletions pyperf/tests/test_perf_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,19 @@ def test_command_track_memory(self):

self._check_track_memory_bench(bench, loops=2)

def test_hook(self):
with tests.temporary_file() as tmp_name:
self.run_command('timeit',
'--hook',
'_test_hook',
'-p2', '-w1', '-l5', '-n3',
'[1,2]*1000',
'-o', tmp_name)
bench = pyperf.Benchmark.load(tmp_name)
metadata = bench.get_metadata()
assert metadata.get("_test_hook", 0) > 0
assert metadata.get("hooks", None) == "_test_hook"


class TestConvert(BaseTestCase, unittest.TestCase):
def test_stdout(self):
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ dev = [
[project.scripts]
pyperf = "pyperf.__main__:main"

[project.entry-points."pyperf.hook"]
pystats = "pyperf._hooks:pystats"
_test_hook = "pyperf._hooks:_test_hook"

[tool.setuptools]
packages = ["pyperf", "pyperf.tests"]

Expand Down

0 comments on commit c6c33d9

Please sign in to comment.