From 5cc6737c0d1e5a3d9b0c258874cfbf959dc4205e Mon Sep 17 00:00:00 2001 From: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:10:26 +0100 Subject: [PATCH] Repeating Tracemalloc Benchmark for accuracy (#5981) * PoC custom benchmark. * Working tracemalloc subclass. * Docstrings and comments. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Finalised code. * Make custom benchmarks installable. * Experiment. * Re-write following testing. * Replace pyproject with setup.py. * Testing. * Make testing smaller. * Better testing. * Better testing 2. * Better testing 3. * Better testing 4. * Better testing 5. * Remove testing. * Remove tracemalloc decorator. * Docs tidy-up. * Restructure and use custom install script. * Temporary quick demo. * Revert "Temporary quick demo." This reverts commit 9dffa1bec548d8a73379ca62ab5b76f2e4fb3c2e. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- benchmarks/README.md | 6 + benchmarks/asv.conf.json | 7 +- benchmarks/benchmarks/__init__.py | 105 ---------- benchmarks/benchmarks/cperf/save.py | 5 +- benchmarks/benchmarks/merge_concat.py | 11 +- .../benchmarks/mesh/utils/regions_combine.py | 13 +- benchmarks/benchmarks/regridding.py | 17 +- benchmarks/benchmarks/save.py | 4 +- .../benchmarks/sperf/combine_regions.py | 14 +- benchmarks/benchmarks/sperf/save.py | 5 +- benchmarks/benchmarks/stats.py | 12 +- benchmarks/benchmarks/trajectory.py | 12 +- benchmarks/custom_bms/README.md | 11 + benchmarks/custom_bms/install.py | 55 +++++ benchmarks/custom_bms/tracemallocbench.py | 196 ++++++++++++++++++ docs/src/whatsnew/latest.rst | 4 +- 16 files changed, 317 insertions(+), 160 deletions(-) create mode 100644 benchmarks/custom_bms/README.md create mode 100644 benchmarks/custom_bms/install.py create mode 100644 benchmarks/custom_bms/tracemallocbench.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 49168e7281..911d5f7833 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -118,6 +118,12 @@ repeats _between_ `setup()` calls using the `repeat` attribute. `warmup_time = 0` is also advisable since ASV performs independent re-runs to estimate run-time, and these will still be subject to the original problem. +### Custom benchmarks + +Iris benchmarking implements custom benchmark types, such as a `tracemalloc` +benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more +detail. + ### Scaling / non-Scaling Performance Differences **(We no longer advocate the below for benchmarks run during CI, given the diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 13e7256b83..2857c90ad7 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -53,9 +53,12 @@ "command_comment": [ "We know that the Nox command takes care of installation in each", "environment, and in the case of Iris no specialised uninstall or", - "build commands are needed to get it working." + "build commands are needed to get it working.", + + "We do however need to install the custom benchmarks for them to be", + "usable." ], "install_command": [], "uninstall_command": [], - "build_command": [] + "build_command": ["python {conf_dir}/custom_bms/install.py"] } diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 378c26332d..30a991a879 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -37,111 +37,6 @@ def disable_repeat_between_setup(benchmark_object): return benchmark_object -class TrackAddedMemoryAllocation: - """Measures by how much process resident memory grew, during execution. - - Context manager which measures by how much process resident memory grew, - during execution of its enclosed code block. - - Obviously limited as to what it actually measures : Relies on the current - process not having significant unused (de-allocated) memory when the - tested codeblock runs, and only reliable when the code allocates a - significant amount of new memory. - - Example: - with TrackAddedMemoryAllocation() as mb: - initial_call() - other_call() - result = mb.addedmem_mb() - - Attributes - ---------- - RESULT_MINIMUM_MB : float - The smallest result that should ever be returned, in Mb. Results - fluctuate from run to run (usually within 1Mb) so if a result is - sufficiently small this noise will produce a before-after ratio over - AVD's detection threshold and be treated as 'signal'. Results - smaller than this value will therefore be returned as equal to this - value, ensuring fractionally small noise / no noise at all. - Defaults to 1.0 - - RESULT_ROUND_DP : int - Number of decimal places of rounding on result values (in Mb). - Defaults to 1 - - """ - - RESULT_MINIMUM_MB = 0.2 - RESULT_ROUND_DP = 1 # I.E. to nearest 0.1 Mb - - def __enter__(self): - tracemalloc.start() - return self - - def __exit__(self, *_): - _, peak_mem_bytes = tracemalloc.get_traced_memory() - tracemalloc.stop() - # Save peak-memory allocation, scaled from bytes to Mb. - self._peak_mb = peak_mem_bytes * (2.0**-20) - - def addedmem_mb(self): - """Return measured memory growth, in Mb.""" - result = self._peak_mb - # Small results are too vulnerable to noise being interpreted as signal. - result = max(self.RESULT_MINIMUM_MB, result) - # Rounding makes results easier to read. - result = np.round(result, self.RESULT_ROUND_DP) - return result - - @staticmethod - def decorator(decorated_func): - """Benchmark to track growth in resident memory during execution. - - Intended for use on ASV ``track_`` benchmarks. Applies the - :class:`TrackAddedMemoryAllocation` context manager to the benchmark - code, sets the benchmark ``unit`` attribute to ``Mb``. - - """ - - def _wrapper(*args, **kwargs): - assert decorated_func.__name__[:6] == "track_" - # Run the decorated benchmark within the added memory context - # manager. - with TrackAddedMemoryAllocation() as mb: - decorated_func(*args, **kwargs) - return mb.addedmem_mb() - - decorated_func.unit = "Mb" - return _wrapper - - @staticmethod - def decorator_repeating(repeats=3): - """Benchmark to track growth in resident memory during execution. - - Tracks memory for repeated calls of decorated function. - - Intended for use on ASV ``track_`` benchmarks. Applies the - :class:`TrackAddedMemoryAllocation` context manager to the benchmark - code, sets the benchmark ``unit`` attribute to ``Mb``. - - """ - - def decorator(decorated_func): - def _wrapper(*args, **kwargs): - assert decorated_func.__name__[:6] == "track_" - # Run the decorated benchmark within the added memory context - # manager. - with TrackAddedMemoryAllocation() as mb: - for _ in range(repeats): - decorated_func(*args, **kwargs) - return mb.addedmem_mb() - - decorated_func.unit = "Mb" - return _wrapper - - return decorator - - def on_demand_benchmark(benchmark_object): """Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set. diff --git a/benchmarks/benchmarks/cperf/save.py b/benchmarks/benchmarks/cperf/save.py index 2d60f920c4..6dcd0b3bcf 100644 --- a/benchmarks/benchmarks/cperf/save.py +++ b/benchmarks/benchmarks/cperf/save.py @@ -6,7 +6,7 @@ from iris import save -from .. import TrackAddedMemoryAllocation, on_demand_benchmark +from .. import on_demand_benchmark from ..generate_data.ugrid import make_cube_like_2d_cubesphere, make_cube_like_umfield from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX @@ -36,6 +36,5 @@ def _save_data(self, cube): def time_save_data_netcdf(self, data_type): self._save_data(self.cube) - @TrackAddedMemoryAllocation.decorator - def track_addedmem_save_data_netcdf(self, data_type): + def tracemalloc_save_data_netcdf(self, data_type): self._save_data(self.cube) diff --git a/benchmarks/benchmarks/merge_concat.py b/benchmarks/benchmarks/merge_concat.py index 1a18f92ce9..0bb4096e6c 100644 --- a/benchmarks/benchmarks/merge_concat.py +++ b/benchmarks/benchmarks/merge_concat.py @@ -8,7 +8,6 @@ from iris.cube import CubeList -from . import TrackAddedMemoryAllocation from .generate_data.stock import realistic_4d_w_everything @@ -34,10 +33,11 @@ def setup(self): def time_merge(self): _ = self.cube_list.merge_cube() - @TrackAddedMemoryAllocation.decorator_repeating() - def track_mem_merge(self): + def tracemalloc_merge(self): _ = self.cube_list.merge_cube() + tracemalloc_merge.number = 3 # type: ignore[attr-defined] + class Concatenate: # TODO: Improve coverage. @@ -56,6 +56,7 @@ def setup(self): def time_concatenate(self): _ = self.cube_list.concatenate_cube() - @TrackAddedMemoryAllocation.decorator_repeating() - def track_mem_merge(self): + def tracemalloc_concatenate(self): _ = self.cube_list.concatenate_cube() + + tracemalloc_concatenate.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks/mesh/utils/regions_combine.py b/benchmarks/benchmarks/mesh/utils/regions_combine.py index 1a1a43a622..a61deea56d 100644 --- a/benchmarks/benchmarks/mesh/utils/regions_combine.py +++ b/benchmarks/benchmarks/mesh/utils/regions_combine.py @@ -17,7 +17,6 @@ from iris import load, load_cube, save from iris.mesh.utils import recombine_submeshes -from ... import TrackAddedMemoryAllocation from ...generate_data.ugrid import make_cube_like_2d_cubesphere @@ -169,8 +168,7 @@ def setup(self, n_cubesphere): def time_create_combined_cube(self, n_cubesphere): self.recombine() - @TrackAddedMemoryAllocation.decorator - def track_addedmem_create_combined_cube(self, n_cubesphere): + def tracemalloc_create_combined_cube(self, n_cubesphere): self.recombine() @@ -180,8 +178,7 @@ class CombineRegionsComputeRealData(MixinCombineRegions): def time_compute_data(self, n_cubesphere): _ = self.recombined_cube.data - @TrackAddedMemoryAllocation.decorator - def track_addedmem_compute_data(self, n_cubesphere): + def tracemalloc_compute_data(self, n_cubesphere): _ = self.recombined_cube.data @@ -199,8 +196,7 @@ def time_save(self, n_cubesphere): # Save to disk, which must compute data + stream it to file. save(self.recombined_cube, "tmp.nc") - @TrackAddedMemoryAllocation.decorator - def track_addedmem_save(self, n_cubesphere): + def tracemalloc_save(self, n_cubesphere): save(self.recombined_cube, "tmp.nc") def track_filesize_saved(self, n_cubesphere): @@ -227,6 +223,5 @@ def time_stream_file2file(self, n_cubesphere): # Save to disk, which must compute data + stream it to file. save(self.recombined_cube, "tmp.nc") - @TrackAddedMemoryAllocation.decorator - def track_addedmem_stream_file2file(self, n_cubesphere): + def tracemalloc_stream_file2file(self, n_cubesphere): save(self.recombined_cube, "tmp.nc") diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index 4cfda05ad1..e227da0ec6 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -14,8 +14,6 @@ from iris.analysis import AreaWeighted, PointInCell from iris.coords import AuxCoord -from . import TrackAddedMemoryAllocation - class HorizontalChunkedRegridding: def setup(self) -> None: @@ -53,20 +51,22 @@ def time_regrid_area_w_new_grid(self) -> None: # Realise data out.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_mem_regrid_area_w(self) -> None: + def tracemalloc_regrid_area_w(self) -> None: # Regrid the chunked cube out = self.cube.regrid(self.template_cube, self.scheme_area_w) # Realise data out.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_mem_regrid_area_w_new_grid(self) -> None: + tracemalloc_regrid_area_w.number = 3 # type: ignore[attr-defined] + + def tracemalloc_regrid_area_w_new_grid(self) -> None: # Regrid the chunked cube out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w) # Realise data out.data + tracemalloc_regrid_area_w_new_grid.number = 3 # type: ignore[attr-defined] + class CurvilinearRegridding: def setup(self) -> None: @@ -110,9 +110,10 @@ def time_regrid_pic(self) -> None: # Realise the data out.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_mem_regrid_pic(self) -> None: + def tracemalloc_regrid_pic(self) -> None: # Regrid the cube onto the template. out = self.cube.regrid(self.template_cube, self.scheme_pic) # Realise the data out.data + + tracemalloc_regrid_pic.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks/save.py b/benchmarks/benchmarks/save.py index aaa8480d64..4bac1b1450 100644 --- a/benchmarks/benchmarks/save.py +++ b/benchmarks/benchmarks/save.py @@ -7,7 +7,6 @@ from iris import save from iris.mesh import save_mesh -from . import TrackAddedMemoryAllocation, on_demand_benchmark from .generate_data.ugrid import make_cube_like_2d_cubesphere @@ -38,8 +37,7 @@ def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured): if is_unstructured: self._save_mesh(self.cube) - @TrackAddedMemoryAllocation.decorator - def track_addedmem_netcdf_save(self, n_cubesphere, is_unstructured): + def tracemalloc_netcdf_save(self, n_cubesphere, is_unstructured): # Don't need to copy the cube here since track_ benchmarks don't # do repeats between self.setup() calls. self._save_data(self.cube, do_copy=False) diff --git a/benchmarks/benchmarks/sperf/combine_regions.py b/benchmarks/benchmarks/sperf/combine_regions.py index b106befcae..591b7bb9be 100644 --- a/benchmarks/benchmarks/sperf/combine_regions.py +++ b/benchmarks/benchmarks/sperf/combine_regions.py @@ -12,7 +12,7 @@ from iris import load, load_cube, save from iris.mesh.utils import recombine_submeshes -from .. import TrackAddedMemoryAllocation, on_demand_benchmark +from .. import on_demand_benchmark from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere @@ -175,8 +175,7 @@ def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=False): def time_create_combined_cube(self, n_cubesphere): self.recombine() - @TrackAddedMemoryAllocation.decorator - def track_addedmem_create_combined_cube(self, n_cubesphere): + def tracemalloc_create_combined_cube(self, n_cubesphere): self.recombine() @@ -187,8 +186,7 @@ class ComputeRealData(Mixin): def time_compute_data(self, n_cubesphere): _ = self.recombined_cube.data - @TrackAddedMemoryAllocation.decorator - def track_addedmem_compute_data(self, n_cubesphere): + def tracemalloc_compute_data(self, n_cubesphere): _ = self.recombined_cube.data @@ -206,8 +204,7 @@ def time_save(self, n_cubesphere): # Save to disk, which must compute data + stream it to file. self.save_recombined_cube() - @TrackAddedMemoryAllocation.decorator - def track_addedmem_save(self, n_cubesphere): + def tracemalloc_save(self, n_cubesphere): self.save_recombined_cube() def track_filesize_saved(self, n_cubesphere): @@ -233,6 +230,5 @@ def time_stream_file2file(self, n_cubesphere): # Save to disk, which must compute data + stream it to file. self.save_recombined_cube() - @TrackAddedMemoryAllocation.decorator - def track_addedmem_stream_file2file(self, n_cubesphere): + def tracemalloc_stream_file2file(self, n_cubesphere): self.save_recombined_cube() diff --git a/benchmarks/benchmarks/sperf/save.py b/benchmarks/benchmarks/sperf/save.py index d8a03798f0..a715ec2424 100644 --- a/benchmarks/benchmarks/sperf/save.py +++ b/benchmarks/benchmarks/sperf/save.py @@ -9,7 +9,7 @@ from iris import save from iris.mesh import save_mesh -from .. import TrackAddedMemoryAllocation, on_demand_benchmark +from .. import on_demand_benchmark from ..generate_data.ugrid import make_cube_like_2d_cubesphere @@ -36,8 +36,7 @@ def _save_mesh(self, cube): def time_save_cube(self, n_cubesphere, is_unstructured): self._save_cube(self.cube) - @TrackAddedMemoryAllocation.decorator - def track_addedmem_save_cube(self, n_cubesphere, is_unstructured): + def tracemalloc_save_cube(self, n_cubesphere, is_unstructured): self._save_cube(self.cube) def time_save_mesh(self, n_cubesphere, is_unstructured): diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py index 1f5262bf4c..fbab12cd4b 100644 --- a/benchmarks/benchmarks/stats.py +++ b/benchmarks/benchmarks/stats.py @@ -8,8 +8,6 @@ from iris.analysis.stats import pearsonr import iris.tests -from . import TrackAddedMemoryAllocation - class PearsonR: def setup(self): @@ -32,10 +30,11 @@ def setup(self): def time_real(self): pearsonr(self.cube_a, self.cube_b, weights=self.weights) - @TrackAddedMemoryAllocation.decorator_repeating() - def track_real(self): + def tracemalloc_real(self): pearsonr(self.cube_a, self.cube_b, weights=self.weights) + tracemalloc_real.number = 3 # type: ignore[attr-defined] + def time_lazy(self): for cube in self.cube_a, self.cube_b: cube.data = cube.lazy_data() @@ -43,10 +42,11 @@ def time_lazy(self): result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) result.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_lazy(self): + def tracemalloc_lazy(self): for cube in self.cube_a, self.cube_b: cube.data = cube.lazy_data() result = pearsonr(self.cube_a, self.cube_b, weights=self.weights) result.data + + tracemalloc_lazy.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/benchmarks/trajectory.py b/benchmarks/benchmarks/trajectory.py index a31552eb9a..77825ef2f2 100644 --- a/benchmarks/benchmarks/trajectory.py +++ b/benchmarks/benchmarks/trajectory.py @@ -13,8 +13,6 @@ import iris from iris.analysis.trajectory import interpolate -from . import TrackAddedMemoryAllocation - class TrajectoryInterpolation: def setup(self) -> None: @@ -35,22 +33,24 @@ def time_trajectory_linear(self) -> None: # Realise the data out_cube.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_trajectory_linear(self) -> None: + def tracemalloc_trajectory_linear(self) -> None: # Regrid the cube onto the template. out_cube = interpolate(self.cube, self.sample_points, method="linear") # Realise the data out_cube.data + tracemalloc_trajectory_linear.number = 3 # type: ignore[attr-defined] + def time_trajectory_nearest(self) -> None: # Regrid the cube onto the template. out_cube = interpolate(self.cube, self.sample_points, method="nearest") # Realise the data out_cube.data - @TrackAddedMemoryAllocation.decorator_repeating() - def track_trajectory_nearest(self) -> None: + def tracemalloc_trajectory_nearest(self) -> None: # Regrid the cube onto the template. out_cube = interpolate(self.cube, self.sample_points, method="nearest") # Realise the data out_cube.data + + tracemalloc_trajectory_nearest.number = 3 # type: ignore[attr-defined] diff --git a/benchmarks/custom_bms/README.md b/benchmarks/custom_bms/README.md new file mode 100644 index 0000000000..eea85d74fe --- /dev/null +++ b/benchmarks/custom_bms/README.md @@ -0,0 +1,11 @@ +# Iris custom benchmarks + +To be recognised by ASV, these benchmarks must be packaged and installed in +line with the +[ASV guidelines](https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html). +This is achieved using the custom build in [install.py](./install.py). + +Installation is into the environment where the benchmarks are run (i.e. not +the environment containing ASV + Nox, but the one built to the same +specifications as the Tests environment). This is done via `build_command` +in [asv.conf.json](../asv.conf.json). diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py new file mode 100644 index 0000000000..59d27a0b43 --- /dev/null +++ b/benchmarks/custom_bms/install.py @@ -0,0 +1,55 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Install Iris' custom benchmarks for detection by ASV. + +See the requirements for being detected as an ASV plugin: +https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html +""" + +from pathlib import Path +import shutil +from subprocess import run +from tempfile import TemporaryDirectory + +this_dir = Path(__file__).parent + + +def package_files(new_dir: Path) -> None: + """Package Iris' custom benchmarks for detection by ASV. + + Parameters + ---------- + new_dir : Path + The directory to package the custom benchmarks in. + """ + asv_bench_iris = new_dir / "asv_bench_iris" + benchmarks = asv_bench_iris / "benchmarks" + benchmarks.mkdir(parents=True) + (asv_bench_iris / "__init__.py").touch() + + for py_file in this_dir.glob("*.py"): + if py_file != Path(__file__): + shutil.copy2(py_file, benchmarks) + + # Create this on the fly, as having multiple pyproject.toml files in 1 + # project causes problems. + py_project = new_dir / "pyproject.toml" + py_project.write_text( + """ + [project] + name = "asv_bench_iris" + version = "0.1" + """ + ) + + +def main(): + with TemporaryDirectory() as temp_dir: + package_files(Path(temp_dir)) + run(["python", "-m", "pip", "install", temp_dir]) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py new file mode 100644 index 0000000000..486c67aeb9 --- /dev/null +++ b/benchmarks/custom_bms/tracemallocbench.py @@ -0,0 +1,196 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. + +"""Benchmark for growth in process resident memory, repeating for accuracy. + +Uses a modified version of the repeat logic in +:class:`asv_runner.benchmarks.time.TimeBenchmark`. +""" + +import re +from timeit import Timer +import tracemalloc +from typing import Callable + +from asv_runner.benchmarks.time import TimeBenchmark, wall_timer + + +class TracemallocBenchmark(TimeBenchmark): + """Benchmark for growth in process resident memory, repeating for accuracy. + + Obviously limited as to what it actually measures : Relies on the current + process not having significant unused (de-allocated) memory when the + tested codeblock runs, and only reliable when the code allocates a + significant amount of new memory. + + Benchmark operations prefixed with ``tracemalloc_`` or ``Tracemalloc`` will + use this benchmark class. + + Inherits behaviour from :class:`asv_runner.benchmarks.time.TimeBenchmark`, + with modifications for memory measurement. See the below Attributes section + and https://asv.readthedocs.io/en/stable/writing_benchmarks.html#timing-benchmarks. + + Attributes + ---------- + Mostly identical to :class:`asv_runner.benchmarks.time.TimeBenchmark`. See + https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks + Make sure to use the inherited ``repeat`` attribute if greater accuracy + is needed. Below are the attributes where inherited behaviour is + overridden. + + number : int + The number of times the benchmarked operation will be called per + ``repeat``. Memory growth is measured after ALL calls - + i.e. `number` should make no difference to the result if the operation + has perfect garbage collection. The parent class's intelligent + modification of `number` is NOT inherited. A minimum value of ``1`` is + enforced. + warmup_time, sample_time, min_run_count, timer + Not used. + type : str = "tracemalloc" + The name of this benchmark type. + unit : str = "bytes" + The units of the measured metric (i.e. the growth in memory). + + """ + + name_regex = re.compile("^(Tracemalloc[A-Z_].+)|(tracemalloc_.+)$") + + param: tuple + + def __init__(self, name: str, func: Callable, attr_sources: list) -> None: + """Initialize a new instance of `TracemallocBenchmark`. + + Parameters + ---------- + name : str + The name of the benchmark. + func : callable + The function to benchmark. + attr_sources : list + A list of objects from which to draw attributes. + """ + super().__init__(name, func, attr_sources) + self.type = "tracemalloc" + self.unit = "bytes" + + def _load_vars(self): + """Load benchmark variables from attribute sources. + + Downstream handling of ``number`` is not the same as in the parent, so + need to make sure it is at least 1. + """ + super()._load_vars() + self.number = max(1, self.number) + + def run(self, *param: tuple) -> dict: + """Run the benchmark with the given parameters. + + Downstream handling of ``param`` is not the same as in the parent, so + need to store it now. + + Parameters + ---------- + *param : tuple + The parameters to pass to the benchmark function. + + Returns + ------- + dict + A dictionary with the benchmark results. It contains the samples + taken, and "the number of times the function was called in each + sample" - for this benchmark that is always ``1`` to avoid the + parent class incorrectly modifying the results. + """ + self.param = param + return super().run(*param) + + def benchmark_timing( + self, + timer: Timer, + min_repeat: int, + max_repeat: int, + max_time: float, + warmup_time: float, + number: int, + min_run_count: int, + ) -> tuple[list[int], int]: + """Benchmark the timing of the function execution. + + Heavily modified from the parent method + - Directly performs setup and measurement (parent used timeit). + - `number` used differently (see Parameters). + - No warmup phase. + + Parameters + ---------- + timer : timeit.Timer + Not used. + min_repeat : int + The minimum number of times to repeat the function execution. + max_repeat : int + The maximum number of times to repeat the function execution. + max_time : float + The maximum total time to spend on the benchmarking. + warmup_time : float + Not used. + number : int + The number of times the benchmarked operation will be called per + repeat. Memory growth is measured after ALL calls - i.e. `number` + should make no difference to the result if the operation + has perfect garbage collection. The parent class's intelligent + modification of `number` is NOT inherited. + min_run_count : int + Not used. + + Returns + ------- + list + A list of the measured memory growths, in bytes. + int = 1 + Part of the inherited return signature. Must be 1 to avoid + the parent incorrectly modifying the results. + """ + start_time = wall_timer() + samples: list[int] = [] + + def too_slow(num_samples) -> bool: + """Stop taking samples if limits exceeded. + + Parameters + ---------- + num_samples : int + The number of samples taken so far. + + Returns + ------- + bool + True if the benchmark should stop, False otherwise. + """ + if num_samples < min_repeat: + return False + return wall_timer() > start_time + max_time + + # Collect samples + while len(samples) < max_repeat: + self.redo_setup() + tracemalloc.start() + for _ in range(number): + __ = self.func(*self.param) + _, peak_mem_bytes = tracemalloc.get_traced_memory() + tracemalloc.stop() + + samples.append(peak_mem_bytes) + + if too_slow(len(samples)): + break + + # ``number`` is not used in the same way as in the parent class. Must + # be returned as 1 to avoid parent incorrectly modifying the results. + return samples, 1 + + +# https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html +export_as_benchmark = [TracemallocBenchmark] diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index efe9fc621c..5bc24b08dc 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -75,7 +75,9 @@ This document explains the changes made to Iris for this release 💼 Internal =========== -#. N/A +#. `@trexfeathers`_ improved the new ``tracemalloc`` benchmarking (introduced + in Iris v3.10.0, :pull:`5948`) to use the same statistical repeat strategy + as timing benchmarks. (:pull:`5981`) .. comment