From 5cc6737c0d1e5a3d9b0c258874cfbf959dc4205e Mon Sep 17 00:00:00 2001
From: Martin Yeo <40734014+trexfeathers@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:10:26 +0100
Subject: [PATCH] Repeating Tracemalloc Benchmark for accuracy (#5981)

* PoC custom benchmark.

* Working tracemalloc subclass.

* Docstrings and comments.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Finalised code.

* Make custom benchmarks installable.

* Experiment.

* Re-write following testing.

* Replace pyproject with setup.py.

* Testing.

* Make testing smaller.

* Better testing.

* Better testing 2.

* Better testing 3.

* Better testing 4.

* Better testing 5.

* Remove testing.

* Remove tracemalloc decorator.

* Docs tidy-up.

* Restructure and use custom install script.

* Temporary quick demo.

* Revert "Temporary quick demo."

This reverts commit 9dffa1bec548d8a73379ca62ab5b76f2e4fb3c2e.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 benchmarks/README.md                          |   6 +
 benchmarks/asv.conf.json                      |   7 +-
 benchmarks/benchmarks/__init__.py             | 105 ----------
 benchmarks/benchmarks/cperf/save.py           |   5 +-
 benchmarks/benchmarks/merge_concat.py         |  11 +-
 .../benchmarks/mesh/utils/regions_combine.py  |  13 +-
 benchmarks/benchmarks/regridding.py           |  17 +-
 benchmarks/benchmarks/save.py                 |   4 +-
 .../benchmarks/sperf/combine_regions.py       |  14 +-
 benchmarks/benchmarks/sperf/save.py           |   5 +-
 benchmarks/benchmarks/stats.py                |  12 +-
 benchmarks/benchmarks/trajectory.py           |  12 +-
 benchmarks/custom_bms/README.md               |  11 +
 benchmarks/custom_bms/install.py              |  55 +++++
 benchmarks/custom_bms/tracemallocbench.py     | 196 ++++++++++++++++++
 docs/src/whatsnew/latest.rst                  |   4 +-
 16 files changed, 317 insertions(+), 160 deletions(-)
 create mode 100644 benchmarks/custom_bms/README.md
 create mode 100644 benchmarks/custom_bms/install.py
 create mode 100644 benchmarks/custom_bms/tracemallocbench.py

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 49168e7281..911d5f7833 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -118,6 +118,12 @@ repeats _between_ `setup()` calls using the `repeat` attribute.
 `warmup_time = 0` is also advisable since ASV performs independent re-runs to
 estimate run-time, and these will still be subject to the original problem.
 
+### Custom benchmarks
+
+Iris benchmarking implements custom benchmark types, such as a `tracemalloc`
+benchmark to measure memory growth. See [custom_bms/](./custom_bms) for more
+detail.
+
 ### Scaling / non-Scaling Performance Differences
 
 **(We no longer advocate the below for benchmarks run during CI, given the
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 13e7256b83..2857c90ad7 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -53,9 +53,12 @@
     "command_comment": [
         "We know that the Nox command takes care of installation in each",
         "environment, and in the case of Iris no specialised uninstall or",
-        "build commands are needed to get it working."
+        "build commands are needed to get it working.",
+
+        "We do however need to install the custom benchmarks for them to be",
+        "usable."
     ],
     "install_command": [],
     "uninstall_command": [],
-    "build_command": []
+    "build_command": ["python {conf_dir}/custom_bms/install.py"]
 }
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index 378c26332d..30a991a879 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -37,111 +37,6 @@ def disable_repeat_between_setup(benchmark_object):
     return benchmark_object
 
 
-class TrackAddedMemoryAllocation:
-    """Measures by how much process resident memory grew, during execution.
-
-    Context manager which measures by how much process resident memory grew,
-    during execution of its enclosed code block.
-
-    Obviously limited as to what it actually measures : Relies on the current
-    process not having significant unused (de-allocated) memory when the
-    tested codeblock runs, and only reliable when the code allocates a
-    significant amount of new memory.
-
-    Example:
-        with TrackAddedMemoryAllocation() as mb:
-            initial_call()
-            other_call()
-        result = mb.addedmem_mb()
-
-    Attributes
-    ----------
-    RESULT_MINIMUM_MB : float
-        The smallest result that should ever be returned, in Mb. Results
-        fluctuate from run to run (usually within 1Mb) so if a result is
-        sufficiently small this noise will produce a before-after ratio over
-        AVD's detection threshold and be treated as 'signal'. Results
-        smaller than this value will therefore be returned as equal to this
-        value, ensuring fractionally small noise / no noise at all.
-        Defaults to 1.0
-
-    RESULT_ROUND_DP : int
-        Number of decimal places of rounding on result values (in Mb).
-        Defaults to 1
-
-    """
-
-    RESULT_MINIMUM_MB = 0.2
-    RESULT_ROUND_DP = 1  # I.E. to nearest 0.1 Mb
-
-    def __enter__(self):
-        tracemalloc.start()
-        return self
-
-    def __exit__(self, *_):
-        _, peak_mem_bytes = tracemalloc.get_traced_memory()
-        tracemalloc.stop()
-        # Save peak-memory allocation, scaled from bytes to Mb.
-        self._peak_mb = peak_mem_bytes * (2.0**-20)
-
-    def addedmem_mb(self):
-        """Return measured memory growth, in Mb."""
-        result = self._peak_mb
-        # Small results are too vulnerable to noise being interpreted as signal.
-        result = max(self.RESULT_MINIMUM_MB, result)
-        # Rounding makes results easier to read.
-        result = np.round(result, self.RESULT_ROUND_DP)
-        return result
-
-    @staticmethod
-    def decorator(decorated_func):
-        """Benchmark to track growth in resident memory during execution.
-
-        Intended for use on ASV ``track_`` benchmarks. Applies the
-        :class:`TrackAddedMemoryAllocation` context manager to the benchmark
-        code, sets the benchmark ``unit`` attribute to ``Mb``.
-
-        """
-
-        def _wrapper(*args, **kwargs):
-            assert decorated_func.__name__[:6] == "track_"
-            # Run the decorated benchmark within the added memory context
-            # manager.
-            with TrackAddedMemoryAllocation() as mb:
-                decorated_func(*args, **kwargs)
-            return mb.addedmem_mb()
-
-        decorated_func.unit = "Mb"
-        return _wrapper
-
-    @staticmethod
-    def decorator_repeating(repeats=3):
-        """Benchmark to track growth in resident memory during execution.
-
-        Tracks memory for repeated calls of decorated function.
-
-        Intended for use on ASV ``track_`` benchmarks. Applies the
-        :class:`TrackAddedMemoryAllocation` context manager to the benchmark
-        code, sets the benchmark ``unit`` attribute to ``Mb``.
-
-        """
-
-        def decorator(decorated_func):
-            def _wrapper(*args, **kwargs):
-                assert decorated_func.__name__[:6] == "track_"
-                # Run the decorated benchmark within the added memory context
-                # manager.
-                with TrackAddedMemoryAllocation() as mb:
-                    for _ in range(repeats):
-                        decorated_func(*args, **kwargs)
-                return mb.addedmem_mb()
-
-            decorated_func.unit = "Mb"
-            return _wrapper
-
-        return decorator
-
-
 def on_demand_benchmark(benchmark_object):
     """Disable these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.
 
diff --git a/benchmarks/benchmarks/cperf/save.py b/benchmarks/benchmarks/cperf/save.py
index 2d60f920c4..6dcd0b3bcf 100644
--- a/benchmarks/benchmarks/cperf/save.py
+++ b/benchmarks/benchmarks/cperf/save.py
@@ -6,7 +6,7 @@
 
 from iris import save
 
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
 from ..generate_data.ugrid import make_cube_like_2d_cubesphere, make_cube_like_umfield
 from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX
 
@@ -36,6 +36,5 @@ def _save_data(self, cube):
     def time_save_data_netcdf(self, data_type):
         self._save_data(self.cube)
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_save_data_netcdf(self, data_type):
+    def tracemalloc_save_data_netcdf(self, data_type):
         self._save_data(self.cube)
diff --git a/benchmarks/benchmarks/merge_concat.py b/benchmarks/benchmarks/merge_concat.py
index 1a18f92ce9..0bb4096e6c 100644
--- a/benchmarks/benchmarks/merge_concat.py
+++ b/benchmarks/benchmarks/merge_concat.py
@@ -8,7 +8,6 @@
 
 from iris.cube import CubeList
 
-from . import TrackAddedMemoryAllocation
 from .generate_data.stock import realistic_4d_w_everything
 
 
@@ -34,10 +33,11 @@ def setup(self):
     def time_merge(self):
         _ = self.cube_list.merge_cube()
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_mem_merge(self):
+    def tracemalloc_merge(self):
         _ = self.cube_list.merge_cube()
 
+    tracemalloc_merge.number = 3  # type: ignore[attr-defined]
+
 
 class Concatenate:
     # TODO: Improve coverage.
@@ -56,6 +56,7 @@ def setup(self):
     def time_concatenate(self):
         _ = self.cube_list.concatenate_cube()
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_mem_merge(self):
+    def tracemalloc_concatenate(self):
         _ = self.cube_list.concatenate_cube()
+
+    tracemalloc_concatenate.number = 3  # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/mesh/utils/regions_combine.py b/benchmarks/benchmarks/mesh/utils/regions_combine.py
index 1a1a43a622..a61deea56d 100644
--- a/benchmarks/benchmarks/mesh/utils/regions_combine.py
+++ b/benchmarks/benchmarks/mesh/utils/regions_combine.py
@@ -17,7 +17,6 @@
 from iris import load, load_cube, save
 from iris.mesh.utils import recombine_submeshes
 
-from ... import TrackAddedMemoryAllocation
 from ...generate_data.ugrid import make_cube_like_2d_cubesphere
 
 
@@ -169,8 +168,7 @@ def setup(self, n_cubesphere):
     def time_create_combined_cube(self, n_cubesphere):
         self.recombine()
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_create_combined_cube(self, n_cubesphere):
+    def tracemalloc_create_combined_cube(self, n_cubesphere):
         self.recombine()
 
 
@@ -180,8 +178,7 @@ class CombineRegionsComputeRealData(MixinCombineRegions):
     def time_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_compute_data(self, n_cubesphere):
+    def tracemalloc_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
 
 
@@ -199,8 +196,7 @@ def time_save(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         save(self.recombined_cube, "tmp.nc")
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_save(self, n_cubesphere):
+    def tracemalloc_save(self, n_cubesphere):
         save(self.recombined_cube, "tmp.nc")
 
     def track_filesize_saved(self, n_cubesphere):
@@ -227,6 +223,5 @@ def time_stream_file2file(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         save(self.recombined_cube, "tmp.nc")
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_stream_file2file(self, n_cubesphere):
+    def tracemalloc_stream_file2file(self, n_cubesphere):
         save(self.recombined_cube, "tmp.nc")
diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py
index 4cfda05ad1..e227da0ec6 100644
--- a/benchmarks/benchmarks/regridding.py
+++ b/benchmarks/benchmarks/regridding.py
@@ -14,8 +14,6 @@
 from iris.analysis import AreaWeighted, PointInCell
 from iris.coords import AuxCoord
 
-from . import TrackAddedMemoryAllocation
-
 
 class HorizontalChunkedRegridding:
     def setup(self) -> None:
@@ -53,20 +51,22 @@ def time_regrid_area_w_new_grid(self) -> None:
         # Realise data
         out.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_mem_regrid_area_w(self) -> None:
+    def tracemalloc_regrid_area_w(self) -> None:
         # Regrid the chunked cube
         out = self.cube.regrid(self.template_cube, self.scheme_area_w)
         # Realise data
         out.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_mem_regrid_area_w_new_grid(self) -> None:
+    tracemalloc_regrid_area_w.number = 3  # type: ignore[attr-defined]
+
+    def tracemalloc_regrid_area_w_new_grid(self) -> None:
         # Regrid the chunked cube
         out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w)
         # Realise data
         out.data
 
+    tracemalloc_regrid_area_w_new_grid.number = 3  # type: ignore[attr-defined]
+
 
 class CurvilinearRegridding:
     def setup(self) -> None:
@@ -110,9 +110,10 @@ def time_regrid_pic(self) -> None:
         # Realise the data
         out.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_mem_regrid_pic(self) -> None:
+    def tracemalloc_regrid_pic(self) -> None:
         # Regrid the cube onto the template.
         out = self.cube.regrid(self.template_cube, self.scheme_pic)
         # Realise the data
         out.data
+
+    tracemalloc_regrid_pic.number = 3  # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/save.py b/benchmarks/benchmarks/save.py
index aaa8480d64..4bac1b1450 100644
--- a/benchmarks/benchmarks/save.py
+++ b/benchmarks/benchmarks/save.py
@@ -7,7 +7,6 @@
 from iris import save
 from iris.mesh import save_mesh
 
-from . import TrackAddedMemoryAllocation, on_demand_benchmark
 from .generate_data.ugrid import make_cube_like_2d_cubesphere
 
 
@@ -38,8 +37,7 @@ def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured):
         if is_unstructured:
             self._save_mesh(self.cube)
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_netcdf_save(self, n_cubesphere, is_unstructured):
+    def tracemalloc_netcdf_save(self, n_cubesphere, is_unstructured):
         # Don't need to copy the cube here since track_ benchmarks don't
         #  do repeats between self.setup() calls.
         self._save_data(self.cube, do_copy=False)
diff --git a/benchmarks/benchmarks/sperf/combine_regions.py b/benchmarks/benchmarks/sperf/combine_regions.py
index b106befcae..591b7bb9be 100644
--- a/benchmarks/benchmarks/sperf/combine_regions.py
+++ b/benchmarks/benchmarks/sperf/combine_regions.py
@@ -12,7 +12,7 @@
 from iris import load, load_cube, save
 from iris.mesh.utils import recombine_submeshes
 
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
 from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere
 
 
@@ -175,8 +175,7 @@ def setup(self, n_cubesphere, imaginary_data=True, create_result_cube=False):
     def time_create_combined_cube(self, n_cubesphere):
         self.recombine()
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_create_combined_cube(self, n_cubesphere):
+    def tracemalloc_create_combined_cube(self, n_cubesphere):
         self.recombine()
 
 
@@ -187,8 +186,7 @@ class ComputeRealData(Mixin):
     def time_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_compute_data(self, n_cubesphere):
+    def tracemalloc_compute_data(self, n_cubesphere):
         _ = self.recombined_cube.data
 
 
@@ -206,8 +204,7 @@ def time_save(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         self.save_recombined_cube()
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_save(self, n_cubesphere):
+    def tracemalloc_save(self, n_cubesphere):
         self.save_recombined_cube()
 
     def track_filesize_saved(self, n_cubesphere):
@@ -233,6 +230,5 @@ def time_stream_file2file(self, n_cubesphere):
         # Save to disk, which must compute data + stream it to file.
         self.save_recombined_cube()
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_stream_file2file(self, n_cubesphere):
+    def tracemalloc_stream_file2file(self, n_cubesphere):
         self.save_recombined_cube()
diff --git a/benchmarks/benchmarks/sperf/save.py b/benchmarks/benchmarks/sperf/save.py
index d8a03798f0..a715ec2424 100644
--- a/benchmarks/benchmarks/sperf/save.py
+++ b/benchmarks/benchmarks/sperf/save.py
@@ -9,7 +9,7 @@
 from iris import save
 from iris.mesh import save_mesh
 
-from .. import TrackAddedMemoryAllocation, on_demand_benchmark
+from .. import on_demand_benchmark
 from ..generate_data.ugrid import make_cube_like_2d_cubesphere
 
 
@@ -36,8 +36,7 @@ def _save_mesh(self, cube):
     def time_save_cube(self, n_cubesphere, is_unstructured):
         self._save_cube(self.cube)
 
-    @TrackAddedMemoryAllocation.decorator
-    def track_addedmem_save_cube(self, n_cubesphere, is_unstructured):
+    def tracemalloc_save_cube(self, n_cubesphere, is_unstructured):
         self._save_cube(self.cube)
 
     def time_save_mesh(self, n_cubesphere, is_unstructured):
diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py
index 1f5262bf4c..fbab12cd4b 100644
--- a/benchmarks/benchmarks/stats.py
+++ b/benchmarks/benchmarks/stats.py
@@ -8,8 +8,6 @@
 from iris.analysis.stats import pearsonr
 import iris.tests
 
-from . import TrackAddedMemoryAllocation
-
 
 class PearsonR:
     def setup(self):
@@ -32,10 +30,11 @@ def setup(self):
     def time_real(self):
         pearsonr(self.cube_a, self.cube_b, weights=self.weights)
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_real(self):
+    def tracemalloc_real(self):
         pearsonr(self.cube_a, self.cube_b, weights=self.weights)
 
+    tracemalloc_real.number = 3  # type: ignore[attr-defined]
+
     def time_lazy(self):
         for cube in self.cube_a, self.cube_b:
             cube.data = cube.lazy_data()
@@ -43,10 +42,11 @@ def time_lazy(self):
         result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
         result.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_lazy(self):
+    def tracemalloc_lazy(self):
         for cube in self.cube_a, self.cube_b:
             cube.data = cube.lazy_data()
 
         result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
         result.data
+
+    tracemalloc_lazy.number = 3  # type: ignore[attr-defined]
diff --git a/benchmarks/benchmarks/trajectory.py b/benchmarks/benchmarks/trajectory.py
index a31552eb9a..77825ef2f2 100644
--- a/benchmarks/benchmarks/trajectory.py
+++ b/benchmarks/benchmarks/trajectory.py
@@ -13,8 +13,6 @@
 import iris
 from iris.analysis.trajectory import interpolate
 
-from . import TrackAddedMemoryAllocation
-
 
 class TrajectoryInterpolation:
     def setup(self) -> None:
@@ -35,22 +33,24 @@ def time_trajectory_linear(self) -> None:
         # Realise the data
         out_cube.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_trajectory_linear(self) -> None:
+    def tracemalloc_trajectory_linear(self) -> None:
         # Regrid the cube onto the template.
         out_cube = interpolate(self.cube, self.sample_points, method="linear")
         # Realise the data
         out_cube.data
 
+    tracemalloc_trajectory_linear.number = 3  # type: ignore[attr-defined]
+
     def time_trajectory_nearest(self) -> None:
         # Regrid the cube onto the template.
         out_cube = interpolate(self.cube, self.sample_points, method="nearest")
         # Realise the data
         out_cube.data
 
-    @TrackAddedMemoryAllocation.decorator_repeating()
-    def track_trajectory_nearest(self) -> None:
+    def tracemalloc_trajectory_nearest(self) -> None:
         # Regrid the cube onto the template.
         out_cube = interpolate(self.cube, self.sample_points, method="nearest")
         # Realise the data
         out_cube.data
+
+    tracemalloc_trajectory_nearest.number = 3  # type: ignore[attr-defined]
diff --git a/benchmarks/custom_bms/README.md b/benchmarks/custom_bms/README.md
new file mode 100644
index 0000000000..eea85d74fe
--- /dev/null
+++ b/benchmarks/custom_bms/README.md
@@ -0,0 +1,11 @@
+# Iris custom benchmarks
+
+To be recognised by ASV, these benchmarks must be packaged and installed in 
+line with the
+[ASV guidelines](https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html).
+This is achieved using the custom build in [install.py](./install.py).
+
+Installation is into the environment where the benchmarks are run (i.e. not
+the environment containing ASV + Nox, but the one built to the same
+specifications as the Tests environment). This is done via `build_command`
+in [asv.conf.json](../asv.conf.json).
diff --git a/benchmarks/custom_bms/install.py b/benchmarks/custom_bms/install.py
new file mode 100644
index 0000000000..59d27a0b43
--- /dev/null
+++ b/benchmarks/custom_bms/install.py
@@ -0,0 +1,55 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Install Iris' custom benchmarks for detection by ASV.
+
+See the requirements for being detected as an ASV plugin:
+https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+"""
+
+from pathlib import Path
+import shutil
+from subprocess import run
+from tempfile import TemporaryDirectory
+
+this_dir = Path(__file__).parent
+
+
+def package_files(new_dir: Path) -> None:
+    """Package Iris' custom benchmarks for detection by ASV.
+
+    Parameters
+    ----------
+    new_dir : Path
+        The directory to package the custom benchmarks in.
+    """
+    asv_bench_iris = new_dir / "asv_bench_iris"
+    benchmarks = asv_bench_iris / "benchmarks"
+    benchmarks.mkdir(parents=True)
+    (asv_bench_iris / "__init__.py").touch()
+
+    for py_file in this_dir.glob("*.py"):
+        if py_file != Path(__file__):
+            shutil.copy2(py_file, benchmarks)
+
+    # Create this on the fly, as having multiple pyproject.toml files in 1
+    #  project causes problems.
+    py_project = new_dir / "pyproject.toml"
+    py_project.write_text(
+        """
+        [project]
+        name = "asv_bench_iris"
+        version = "0.1"
+        """
+    )
+
+
+def main():
+    with TemporaryDirectory() as temp_dir:
+        package_files(Path(temp_dir))
+        run(["python", "-m", "pip", "install", temp_dir])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/custom_bms/tracemallocbench.py b/benchmarks/custom_bms/tracemallocbench.py
new file mode 100644
index 0000000000..486c67aeb9
--- /dev/null
+++ b/benchmarks/custom_bms/tracemallocbench.py
@@ -0,0 +1,196 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+
+"""Benchmark for growth in process resident memory, repeating for accuracy.
+
+Uses a modified version of the repeat logic in
+:class:`asv_runner.benchmarks.time.TimeBenchmark`.
+"""
+
+import re
+from timeit import Timer
+import tracemalloc
+from typing import Callable
+
+from asv_runner.benchmarks.time import TimeBenchmark, wall_timer
+
+
+class TracemallocBenchmark(TimeBenchmark):
+    """Benchmark for growth in process resident memory, repeating for accuracy.
+
+    Obviously limited as to what it actually measures : Relies on the current
+    process not having significant unused (de-allocated) memory when the
+    tested codeblock runs, and only reliable when the code allocates a
+    significant amount of new memory.
+
+    Benchmark operations prefixed with ``tracemalloc_`` or ``Tracemalloc`` will
+    use this benchmark class.
+
+    Inherits behaviour from :class:`asv_runner.benchmarks.time.TimeBenchmark`,
+    with modifications for memory measurement. See the below Attributes section
+    and https://asv.readthedocs.io/en/stable/writing_benchmarks.html#timing-benchmarks.
+
+    Attributes
+    ----------
+    Mostly identical to :class:`asv_runner.benchmarks.time.TimeBenchmark`. See
+    https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks
+    Make sure to use the inherited ``repeat`` attribute if greater accuracy
+    is needed. Below are the attributes where inherited behaviour is
+    overridden.
+
+    number : int
+        The number of times the benchmarked operation will be called per
+        ``repeat``. Memory growth is measured after ALL calls -
+        i.e. `number` should make no difference to the result if the operation
+        has perfect garbage collection. The parent class's intelligent
+        modification of `number` is NOT inherited. A minimum value of ``1`` is
+        enforced.
+    warmup_time, sample_time, min_run_count, timer
+        Not used.
+    type : str = "tracemalloc"
+        The name of this benchmark type.
+    unit : str = "bytes"
+        The units of the measured metric (i.e. the growth in memory).
+
+    """
+
+    name_regex = re.compile("^(Tracemalloc[A-Z_].+)|(tracemalloc_.+)$")
+
+    param: tuple
+
+    def __init__(self, name: str, func: Callable, attr_sources: list) -> None:
+        """Initialize a new instance of `TracemallocBenchmark`.
+
+        Parameters
+        ----------
+        name : str
+            The name of the benchmark.
+        func : callable
+            The function to benchmark.
+        attr_sources : list
+            A list of objects from which to draw attributes.
+        """
+        super().__init__(name, func, attr_sources)
+        self.type = "tracemalloc"
+        self.unit = "bytes"
+
+    def _load_vars(self):
+        """Load benchmark variables from attribute sources.
+
+        Downstream handling of ``number`` is not the same as in the parent, so
+        need to make sure it is at least 1.
+        """
+        super()._load_vars()
+        self.number = max(1, self.number)
+
+    def run(self, *param: tuple) -> dict:
+        """Run the benchmark with the given parameters.
+
+        Downstream handling of ``param`` is not the same as in the parent, so
+        need to store it now.
+
+        Parameters
+        ----------
+        *param : tuple
+            The parameters to pass to the benchmark function.
+
+        Returns
+        -------
+        dict
+            A dictionary with the benchmark results. It contains the samples
+            taken, and "the number of times the function was called in each
+            sample" - for this benchmark that is always ``1`` to avoid the
+            parent class incorrectly modifying the results.
+        """
+        self.param = param
+        return super().run(*param)
+
+    def benchmark_timing(
+        self,
+        timer: Timer,
+        min_repeat: int,
+        max_repeat: int,
+        max_time: float,
+        warmup_time: float,
+        number: int,
+        min_run_count: int,
+    ) -> tuple[list[int], int]:
+        """Benchmark the timing of the function execution.
+
+        Heavily modified from the parent method
+        - Directly performs setup and measurement (parent used timeit).
+        - `number` used differently (see Parameters).
+        - No warmup phase.
+
+        Parameters
+        ----------
+        timer : timeit.Timer
+            Not used.
+        min_repeat : int
+            The minimum number of times to repeat the function execution.
+        max_repeat : int
+            The maximum number of times to repeat the function execution.
+        max_time : float
+            The maximum total time to spend on the benchmarking.
+        warmup_time : float
+            Not used.
+        number : int
+            The number of times the benchmarked operation will be called per
+            repeat. Memory growth is measured after ALL calls - i.e. `number`
+            should make no difference to the result if the operation
+            has perfect garbage collection. The parent class's intelligent
+            modification of `number` is NOT inherited.
+        min_run_count : int
+            Not used.
+
+        Returns
+        -------
+        list
+            A list of the measured memory growths, in bytes.
+        int = 1
+            Part of the inherited return signature. Must be 1 to avoid
+            the parent incorrectly modifying the results.
+        """
+        start_time = wall_timer()
+        samples: list[int] = []
+
+        def too_slow(num_samples) -> bool:
+            """Stop taking samples if limits exceeded.
+
+            Parameters
+            ----------
+            num_samples : int
+                The number of samples taken so far.
+
+            Returns
+            -------
+            bool
+                True if the benchmark should stop, False otherwise.
+            """
+            if num_samples < min_repeat:
+                return False
+            return wall_timer() > start_time + max_time
+
+        # Collect samples
+        while len(samples) < max_repeat:
+            self.redo_setup()
+            tracemalloc.start()
+            for _ in range(number):
+                __ = self.func(*self.param)
+            _, peak_mem_bytes = tracemalloc.get_traced_memory()
+            tracemalloc.stop()
+
+            samples.append(peak_mem_bytes)
+
+            if too_slow(len(samples)):
+                break
+
+        # ``number`` is not used in the same way as in the parent class. Must
+        #  be returned as 1 to avoid parent incorrectly modifying the results.
+        return samples, 1
+
+
+# https://asv.readthedocs.io/projects/asv-runner/en/latest/development/benchmark_plugins.html
+export_as_benchmark = [TracemallocBenchmark]
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
index efe9fc621c..5bc24b08dc 100644
--- a/docs/src/whatsnew/latest.rst
+++ b/docs/src/whatsnew/latest.rst
@@ -75,7 +75,9 @@ This document explains the changes made to Iris for this release
 💼 Internal
 ===========
 
-#. N/A
+#. `@trexfeathers`_ improved the new ``tracemalloc`` benchmarking (introduced
+   in Iris v3.10.0, :pull:`5948`) to use the same statistical repeat strategy
+   as timing benchmarks. (:pull:`5981`)
 
 
 .. comment