From 11eeb2393cc82eee13ba3bc6f047fa492cbe7482 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 18 May 2023 13:15:01 -0300 Subject: [PATCH 1/7] implement lru_cache for get_variables_by_attributes --- src/netCDF4/_netCDF4.pyx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 5b18f89a7..3943c320d 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1226,6 +1226,7 @@ from cpython.bytes cimport PyBytes_FromStringAndSize from .utils import (_StartCountStride, _quantize, _find_dim, _walk_grps, _out_array_shape, _sortbylist, _tostr, _safecast, _is_int) import sys +import functools __version__ = "1.6.3" @@ -2099,7 +2100,7 @@ strings. cdef Py_buffer _buffer cdef public groups, dimensions, variables, disk_format, path, parent,\ file_format, data_model, cmptypes, vltypes, enumtypes, __orthogonal_indexing__, \ - keepweakref, _ncstring_attrs__ + keepweakref, _ncstring_attrs__, get_variables_by_attributes def __init__(self, filename, mode='r', clobber=True, format='NETCDF4', diskless=False, persist=False, keepweakref=False, @@ -2211,6 +2212,10 @@ strings. memset(&self._buffer, 0, sizeof(self._buffer)) + self.get_variables_by_attributes = functools.lru_cache(maxsize=128)( + self._get_variables_by_attributes_uncached, + ) + # flag to indicate that Variables in this Dataset support orthogonal indexing. self.__orthogonal_indexing__ = True if diskless and __netcdf4libversion__ < '4.2.1': @@ -3333,9 +3338,9 @@ of existing (sub-) groups and their variables. for group in groups: group.set_ncstring_attrs(value) # recurse into subgroups... - def get_variables_by_attributes(self, **kwargs): + def _get_variables_by_attributes_uncached(self, **kwargs): """ -**`get_variables_by_attribute(self, **kwargs)`** +**`get_variables_by_attributes(self, **kwargs)`** Returns a list of variables that match specific conditions. @@ -6913,6 +6918,9 @@ Example usage (See `MFDataset.__init__` for more details): else: return Dataset.__getattribute__(self, name) + def get_variables_by_attributes(self, **kwargs): + return Dataset._get_variables_by_attributes_uncached(self, **kwargs) + def ncattrs(self): """ **`ncattrs(self)`** From b739cccfa40e95ce4002444073f3b11730f03c0b Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Mon, 22 May 2023 10:38:28 -0300 Subject: [PATCH 2/7] clear cache --- src/netCDF4/_netCDF4.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 3943c320d..3d7cd8972 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -2622,7 +2622,9 @@ Is the Dataset open or closed? return bool(self._isopen) def __dealloc__(self): - # close file when there are no references to object left + # close file when there are no references to object left and clear the cache. + if self.get_variables_by_attributes: + self.get_variables_by_attributes.cache_clear() if self._isopen: self._close(False) From 91af6c591ec154c6a56dc3ca4e9726715d30918a Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Mon, 22 May 2023 11:27:49 -0300 Subject: [PATCH 3/7] decorate the class method --- src/netCDF4/_netCDF4.pyx | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx index 3d7cd8972..12dc3ec6a 100644 --- a/src/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -2100,7 +2100,7 @@ strings. cdef Py_buffer _buffer cdef public groups, dimensions, variables, disk_format, path, parent,\ file_format, data_model, cmptypes, vltypes, enumtypes, __orthogonal_indexing__, \ - keepweakref, _ncstring_attrs__, get_variables_by_attributes + keepweakref, _ncstring_attrs__ def __init__(self, filename, mode='r', clobber=True, format='NETCDF4', diskless=False, persist=False, keepweakref=False, @@ -2212,10 +2212,6 @@ strings. memset(&self._buffer, 0, sizeof(self._buffer)) - self.get_variables_by_attributes = functools.lru_cache(maxsize=128)( - self._get_variables_by_attributes_uncached, - ) - # flag to indicate that Variables in this Dataset support orthogonal indexing. self.__orthogonal_indexing__ = True if diskless and __netcdf4libversion__ < '4.2.1': @@ -3340,7 +3336,8 @@ of existing (sub-) groups and their variables. for group in groups: group.set_ncstring_attrs(value) # recurse into subgroups... - def _get_variables_by_attributes_uncached(self, **kwargs): + @functools.lru_cache(maxsize=128) + def get_variables_by_attributes(self, **kwargs): """ **`get_variables_by_attributes(self, **kwargs)`** @@ -6920,9 +6917,6 @@ Example usage (See `MFDataset.__init__` for more details): else: return Dataset.__getattribute__(self, name) - def get_variables_by_attributes(self, **kwargs): - return Dataset._get_variables_by_attributes_uncached(self, **kwargs) - def ncattrs(self): """ **`ncattrs(self)`** From 412fbb33071f19d010cc94e27d3a56837401ebc0 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 24 May 2023 10:36:28 -0300 Subject: [PATCH 4/7] opening, calling gba, and closing, multiple times --- test/tst_multiple_open_close.py | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 test/tst_multiple_open_close.py diff --git a/test/tst_multiple_open_close.py b/test/tst_multiple_open_close.py new file mode 100644 index 000000000..ea592ed9c --- /dev/null +++ b/test/tst_multiple_open_close.py @@ -0,0 +1,51 @@ +import os +import tracemalloc +import unittest + +import netCDF4 + +class MultipleVariablesByAttributesCallsTests(unittest.TestCase): + + + def test_multiple_calls(self): + netcdf_file = os.path.join(os.path.dirname(__file__), "netcdf_dummy_file.nc") + snapshot = tracemalloc.take_snapshot() + + # k_times = 1000_000 + k_times = 10 + for _k in range(k_times): + nc = netCDF4.Dataset(netcdf_file) + + vs = nc.get_variables_by_attributes(axis='Z') + self.assertEqual(len(vs), 1) + + vs = nc.get_variables_by_attributes(units='m/s') + self.assertEqual(len(vs), 4) + + vs = nc.get_variables_by_attributes(axis='Z', units='m') + self.assertEqual(len(vs), 1) + + vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T']) + self.assertEqual(len(vs), 1) + + vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None) + self.assertEqual(len(vs), 12) + + vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None, long_name=lambda v: v is not None and 'Upward (w) velocity' in v) + self.assertEqual(len(vs), 1) + + vs = nc.get_variables_by_attributes(units='m/s', grid_mapping=lambda v: v is not None) + self.assertEqual(len(vs), 4) + + vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None, long_name='Upward (w) velocity') + self.assertEqual(len(vs), 1) + nc.close() + stats = tracemalloc.take_snapshot().compare_to(snapshot, 'filename') + print("[ Top 10 differences ]") + for stat in stats[:10]: + print(stat) + +if __name__ == '__main__': + tracemalloc.start() + unittest.main() + tracemalloc.stop() From e83c2080157122d8b95a03a1be4498bf9894710e Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 24 May 2023 12:33:28 -0300 Subject: [PATCH 5/7] move inside the function --- test/tst_multiple_open_close.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/tst_multiple_open_close.py b/test/tst_multiple_open_close.py index ea592ed9c..46647d0b3 100644 --- a/test/tst_multiple_open_close.py +++ b/test/tst_multiple_open_close.py @@ -9,9 +9,9 @@ class MultipleVariablesByAttributesCallsTests(unittest.TestCase): def test_multiple_calls(self): netcdf_file = os.path.join(os.path.dirname(__file__), "netcdf_dummy_file.nc") + tracemalloc.start() snapshot = tracemalloc.take_snapshot() - # k_times = 1000_000 k_times = 10 for _k in range(k_times): nc = netCDF4.Dataset(netcdf_file) @@ -41,11 +41,10 @@ def test_multiple_calls(self): self.assertEqual(len(vs), 1) nc.close() stats = tracemalloc.take_snapshot().compare_to(snapshot, 'filename') + tracemalloc.stop() print("[ Top 10 differences ]") for stat in stats[:10]: print(stat) -if __name__ == '__main__': - tracemalloc.start() +if __name__ == '__main__': unittest.main() - tracemalloc.stop() From e08f7bd75f02c2ef03f313fb4d809414c073d0fb Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 24 May 2023 14:12:10 -0300 Subject: [PATCH 6/7] don't run memory leak test --- test/run_all.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/run_all.py b/test/run_all.py index f2ff97a18..199bf0676 100755 --- a/test/run_all.py +++ b/test/run_all.py @@ -57,6 +57,11 @@ test_files.remove('tst_cdl.py'); sys.stdout.write('not running tst_cdl.py ...\n') +# Don't run computationally intensive test +if os.getenv('MEMORY_LEAK_TEST'): + test_files.remove('tst_multiple_open_close.py'); + sys.stdout.write('not running tst_multiple_open_close.py ...\n') + # Build the test suite from the tests found in the test files. testsuite = unittest.TestSuite() for f in test_files: From 60cd3a0016e265e6ac738c2473e0de59a1be8737 Mon Sep 17 00:00:00 2001 From: Filipe Date: Wed, 24 May 2023 18:47:41 -0300 Subject: [PATCH 7/7] fix copy-n-pasta --- test/run_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/run_all.py b/test/run_all.py index 199bf0676..91c4b54ea 100755 --- a/test/run_all.py +++ b/test/run_all.py @@ -58,7 +58,7 @@ sys.stdout.write('not running tst_cdl.py ...\n') # Don't run computationally intensive test -if os.getenv('MEMORY_LEAK_TEST'): +if not os.getenv('MEMORY_LEAK_TEST'): test_files.remove('tst_multiple_open_close.py'); sys.stdout.write('not running tst_multiple_open_close.py ...\n')