-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
H5 context management #24
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,78 +7,116 @@ | |||||
|
||||||
H5_ACCESS_MODES = ("r", "r+", "w", "w-", "x", "a") | ||||||
|
||||||
H5_DATASET_KWDS = ("name", | ||||||
"shape", | ||||||
"dtype", | ||||||
"data", | ||||||
"chunks", | ||||||
"compression", | ||||||
"compression_opts", | ||||||
"scaleoffset", | ||||||
"shuffle", | ||||||
"fletcher32", | ||||||
"maxshape", | ||||||
"fillvalue", | ||||||
"track_times", | ||||||
"track_order", | ||||||
"external", | ||||||
"allow_unknown_filter") | ||||||
|
||||||
H5_GROUP_KWDS = ("name", | ||||||
"track_order") | ||||||
|
||||||
H5_FILE_KWDS = ("name", | ||||||
"mode", | ||||||
"driver", | ||||||
"libver", | ||||||
"userblock_size", | ||||||
"swmr", | ||||||
"rdcc_nslots", | ||||||
"rdcc_nbytes", | ||||||
"rdcc_w0", | ||||||
"track_order", | ||||||
"fs_strategy", | ||||||
"fs_persist", | ||||||
"fs_threshold") | ||||||
H5_DATASET_KWDS = ( | ||||||
"name", | ||||||
"shape", | ||||||
"dtype", | ||||||
"data", | ||||||
"chunks", | ||||||
"compression", | ||||||
"compression_opts", | ||||||
"scaleoffset", | ||||||
"shuffle", | ||||||
"fletcher32", | ||||||
"maxshape", | ||||||
"fillvalue", | ||||||
"track_times", | ||||||
"track_order", | ||||||
"dcpl", | ||||||
"external", | ||||||
"allow_unknown_filter", | ||||||
) | ||||||
|
||||||
H5_GROUP_KWDS = ("name", "track_order") | ||||||
|
||||||
H5_FILE_KWDS = ( | ||||||
"name", | ||||||
"mode", | ||||||
"driver", | ||||||
"libver", | ||||||
"userblock_size", | ||||||
"swmr", | ||||||
"rdcc_nslots", | ||||||
"rdcc_nbytes", | ||||||
"rdcc_w0", | ||||||
"track_order", | ||||||
"fs_strategy", | ||||||
"fs_persist", | ||||||
"fs_threshold", | ||||||
) | ||||||
|
||||||
|
||||||
# Could use multiple inheritance here | ||||||
class ManagedDataset(h5py.Dataset): | ||||||
""" | ||||||
h5py.Dataset with context manager behavior | ||||||
""" | ||||||
|
||||||
def __enter__(self): | ||||||
return self | ||||||
|
||||||
def __exit__(self, ex_type, ex_value, ex_traceback): | ||||||
self.file.close() | ||||||
|
||||||
|
||||||
class ManagedGroup(h5py.Group): | ||||||
""" | ||||||
h5py.Group with context manager behavior | ||||||
""" | ||||||
|
||||||
def __enter__(self): | ||||||
return self | ||||||
|
||||||
def __exit__(self, ex_type, ex_value, ex_traceback): | ||||||
self.file.close() | ||||||
|
||||||
|
||||||
def partition_h5_kwargs(**kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: | ||||||
""" | ||||||
partition kwargs into file-creation kwargs and dataset-creation kwargs | ||||||
""" | ||||||
file_kwargs = kwargs.copy() | ||||||
dataset_kwargs = {} | ||||||
for key in H5_DATASET_KWDS: | ||||||
if key in file_kwargs: | ||||||
for key in kwargs: | ||||||
if key in H5_DATASET_KWDS: | ||||||
dataset_kwargs[key] = file_kwargs.pop(key) | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider keeping |
||||||
return file_kwargs, dataset_kwargs | ||||||
|
||||||
|
||||||
def access_h5( | ||||||
store: Pathlike, path: Pathlike, mode: str, **kwargs | ||||||
store: Union[h5py.File, Pathlike], path: Pathlike, **kwargs | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting, how is that different from naming a dataset with the empty string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For one, providing an empty string results in a In [22]: with h5py.File("test.hdf5", "w") as h5f:
...: h5f.create_dataset(None, data = np.zeros((5,5)))
...: print(list(h5f.keys()))
...:
...:
[]
In [23]: with h5py.File("test.hdf5", "w") as h5f:
...: h5f.create_dataset("", data = np.zeros((5,5)))
...: print(list(h5f.keys()))
...:
...:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-5230edf15b52> in <module>
1 with h5py.File("test.hdf5", "w") as h5f:
----> 2 h5f.create_dataset("", data = np.zeros((5,5)))
3 print(list(h5f.keys()))
4
5
~\.julia\conda\3\lib\site-packages\h5py\_hl\group.py in create_dataset(self, name, shape, dtype, data, **kwds)
147 group = self.require_group(parent_path)
148
--> 149 dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
150 dset = dataset.Dataset(dsid)
151 return dset
~\.julia\conda\3\lib\site-packages\h5py\_hl\dataset.py in make_new_dset(parent, shape, dtype, data, name, chunks, compression, shuffle, fletcher32, maxshape, compression_opts, fillvalue, scaleoffset, track_times, external, track_order, dcpl, allow_unknown_filter)
140
141
--> 142 dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl)
143
144 if (data is not None) and (not isinstance(data, Empty)):
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\h5d.pyx in h5py.h5d.create()
TypeError: expected bytes, str found
In [24]: with h5py.File("test.hdf5", "w") as h5f:
...: h5f.create_dataset(b"", data = np.zeros((5,5)))
...: print(list(h5f.keys()))
...:
...:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-24-7f1869f34ebe> in <module>
1 with h5py.File("test.hdf5", "w") as h5f:
----> 2 h5f.create_dataset(b"", data = np.zeros((5,5)))
3 print(list(h5f.keys()))
4
5
~\.julia\conda\3\lib\site-packages\h5py\_hl\group.py in create_dataset(self, name, shape, dtype, data, **kwds)
147 group = self.require_group(parent_path)
148
--> 149 dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
150 dset = dataset.Dataset(dsid)
151 return dset
~\.julia\conda\3\lib\site-packages\h5py\_hl\dataset.py in make_new_dset(parent, shape, dtype, data, name, chunks, compression, shuffle, fletcher32, maxshape, compression_opts, fillvalue, scaleoffset, track_times, external, track_order, dcpl, allow_unknown_filter)
140
141
--> 142 dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl)
143
144 if (data is not None) and (not isinstance(data, Empty)):
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\_objects.pyx in h5py._objects.with_phil.wrapper()
h5py\h5d.pyx in h5py.h5d.create()
ValueError: Unable to create dataset (no name given) |
||||||
) -> Union[h5py.Dataset, h5py.Group]: | ||||||
""" | ||||||
Docstring | ||||||
""" | ||||||
if mode not in H5_ACCESS_MODES: | ||||||
raise ValueError(f"Invalid access mode. Got {mode}, expected one of {H5_ACCESS_MODES}.") | ||||||
|
||||||
attrs = kwargs.pop("attrs", {}) | ||||||
mode = kwargs.get("mode", "r") | ||||||
file_kwargs, dataset_kwargs = partition_h5_kwargs(**kwargs) | ||||||
|
||||||
h5f = h5py.File(store, mode=mode, **file_kwargs) | ||||||
|
||||||
if mode in ("r", "r+", "a") and (result := h5f.get(path)) is not None: | ||||||
return result | ||||||
if isinstance(store, h5py.File): | ||||||
h5f = store | ||||||
else: | ||||||
h5f = h5py.File(store, **file_kwargs) | ||||||
|
||||||
if mode in ("r", "r+", "a"): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps if path is empty, just return the File, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we are in |
||||||
# let h5py handle keyerrors | ||||||
result = h5f[path] | ||||||
else: | ||||||
if len(dataset_kwargs) > 0: | ||||||
if 'name' in dataset_kwargs: | ||||||
warnings.warn('"Name" was provided to this function as a keyword argument. This value will be replaced with the second argument to this function.') | ||||||
if "name" in dataset_kwargs: | ||||||
warnings.warn( | ||||||
'"Name" was provided to this function as a keyword argument. This value will be replaced with the second argument to this function.' | ||||||
) | ||||||
dataset_kwargs["name"] = path | ||||||
result = h5f.create_dataset(**dataset_kwargs) | ||||||
else: | ||||||
result = h5f.require_group(path) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allow |
||||||
|
||||||
result.attrs.update(**attrs) | ||||||
|
||||||
return result | ||||||
if isinstance(result, h5py.Group): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A |
||||||
result = ManagedGroup(result.id) | ||||||
else: | ||||||
result = ManagedDataset(result.id) | ||||||
|
||||||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from h5py._hl.dataset import make_new_dset | ||
from fibsem_tools.io.h5 import partition_h5_kwargs | ||
from inspect import signature, Parameter | ||
|
||
|
||
def test_kwarg_partition(): | ||
dataset_creation_sig = signature(make_new_dset) | ||
dataset_kwargs = { | ||
k: None | ||
for k, v in filter( | ||
lambda p: p[1].default is not Parameter.empty, | ||
dataset_creation_sig.parameters.items(), | ||
) | ||
} | ||
file_kwargs = {"foo": None, "bar": None} | ||
file_kwargs_out, dataset_kwargs_out = partition_h5_kwargs( | ||
**dataset_kwargs, **file_kwargs | ||
) | ||
assert file_kwargs == file_kwargs_out | ||
assert dataset_kwargs == dataset_kwargs_out | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test for common keyword arguments like |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since
track_order
is a common keyword to Datasets, Group's, and Files perhaps this parameter should be passed to all objects when they are created.