-
Notifications
You must be signed in to change notification settings - Fork 263
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement lru_cache for get_variables_by_attributes #1253
Changes from 6 commits
11eeb23
b739ccc
91af6c5
412fbb3
e83c208
e08f7bd
60cd3a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1226,6 +1226,7 @@ from cpython.bytes cimport PyBytes_FromStringAndSize | |
from .utils import (_StartCountStride, _quantize, _find_dim, _walk_grps, | ||
_out_array_shape, _sortbylist, _tostr, _safecast, _is_int) | ||
import sys | ||
import functools | ||
|
||
__version__ = "1.6.3" | ||
|
||
|
@@ -2617,7 +2618,9 @@ Is the Dataset open or closed? | |
return bool(self._isopen) | ||
|
||
def __dealloc__(self): | ||
# close file when there are no references to object left | ||
# close file when there are no references to object left and clear the cache. | ||
if self.get_variables_by_attributes: | ||
self.get_variables_by_attributes.cache_clear() | ||
if self._isopen: | ||
self._close(False) | ||
|
||
|
@@ -3333,9 +3336,10 @@ of existing (sub-) groups and their variables. | |
for group in groups: | ||
group.set_ncstring_attrs(value) # recurse into subgroups... | ||
|
||
@functools.lru_cache(maxsize=128) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding the decorator here makes things simpler but can lead to memory leaks, see python/cpython#64058. However, b/c I could not make the most common fix for this work here and I believe with the cache clear in the What do you think @jswhit? The alternative would be to factor this function out into a private one that takes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ocefpaf I'm OK with this approach unless there are significant memory leaks. I guess you could create a test for memory leaks by repeatedly opening and closing the same file and invoking get_variables_by_attributes multiple times to see what happens to memory usage. |
||
def get_variables_by_attributes(self, **kwargs): | ||
""" | ||
**`get_variables_by_attribute(self, **kwargs)`** | ||
**`get_variables_by_attributes(self, **kwargs)`** | ||
|
||
Returns a list of variables that match specific conditions. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,11 @@ | |
test_files.remove('tst_cdl.py'); | ||
sys.stdout.write('not running tst_cdl.py ...\n') | ||
|
||
# Don't run computationally intensive test | ||
if os.getenv('MEMORY_LEAK_TEST'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we want |
||
test_files.remove('tst_multiple_open_close.py'); | ||
sys.stdout.write('not running tst_multiple_open_close.py ...\n') | ||
|
||
# Build the test suite from the tests found in the test files. | ||
testsuite = unittest.TestSuite() | ||
for f in test_files: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import tracemalloc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if I'm using this correctly. I'm out of my depth here. Edit: moving it before the |
||
import unittest | ||
|
||
import netCDF4 | ||
|
||
class MultipleVariablesByAttributesCallsTests(unittest.TestCase): | ||
|
||
|
||
def test_multiple_calls(self): | ||
netcdf_file = os.path.join(os.path.dirname(__file__), "netcdf_dummy_file.nc") | ||
tracemalloc.start() | ||
snapshot = tracemalloc.take_snapshot() | ||
|
||
k_times = 10 | ||
for _k in range(k_times): | ||
nc = netCDF4.Dataset(netcdf_file) | ||
|
||
vs = nc.get_variables_by_attributes(axis='Z') | ||
self.assertEqual(len(vs), 1) | ||
|
||
vs = nc.get_variables_by_attributes(units='m/s') | ||
self.assertEqual(len(vs), 4) | ||
|
||
vs = nc.get_variables_by_attributes(axis='Z', units='m') | ||
self.assertEqual(len(vs), 1) | ||
|
||
vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T']) | ||
self.assertEqual(len(vs), 1) | ||
|
||
vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None) | ||
self.assertEqual(len(vs), 12) | ||
|
||
vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None, long_name=lambda v: v is not None and 'Upward (w) velocity' in v) | ||
self.assertEqual(len(vs), 1) | ||
|
||
vs = nc.get_variables_by_attributes(units='m/s', grid_mapping=lambda v: v is not None) | ||
self.assertEqual(len(vs), 4) | ||
|
||
vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None, long_name='Upward (w) velocity') | ||
self.assertEqual(len(vs), 1) | ||
nc.close() | ||
stats = tracemalloc.take_snapshot().compare_to(snapshot, 'filename') | ||
tracemalloc.stop() | ||
print("[ Top 10 differences ]") | ||
for stat in stats[:10]: | ||
print(stat) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we do need this but this alone did not fix the failing test. In fact, the
__dealloc__
test fails when I add a function object in the class__init__
that is a reference to thedef
ined one, no relation to caching or not. I don't know enough about cython to understand what is wrong there.