Skip to content

Commit

Permalink
BF(workaround): loop through namespaces while validating nwb
Browse files Browse the repository at this point in the history
To overcome problems like presented in dandi/helpdesk#43
this introduces solution proposed by @orugbel in #917 (comment)

Unfortunately there were no release of pynwb with that function yet, so we
are doomed to duplicate code and do it "manually" here for now

Closes #917
  • Loading branch information
yarikoptic committed Jun 29, 2022
1 parent 4d9e548 commit eaeb8c9
Showing 1 changed file with 71 additions and 2 deletions.
73 changes: 71 additions & 2 deletions dandi/pynwb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,76 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None:
container.external_file[no] = str(name_new)


# borrowed from
# https://github.com/NeurodataWithoutBorders/pynwb/blob/745aaf26fa56958254e1d22a73d4c962c8074332/src/pynwb/validate.py#L29
# which is part of the https://github.com/NeurodataWithoutBorders/pynwb/pull/1432
# and needed to overcome errors like in https://github.com/dandi/helpdesk/discussions/43
def get_cached_namespaces_to_validate(path):
"""
Determine the most specific namespace(s) (i.e., extensions) that are cached in the given
NWB file that should be used for validation.
Example
-------
The following example illustrates how we can use this function to validate against namespaces
cached in a file. This is useful, e.g., when a file was created using an extension
>>> from pynwb import validate
>>> from pynwb.validate import get_cached_namespaces_to_validate
>>> path = "my_nwb_file.nwb"
>>> validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path)
>>> with NWBHDF5IO(path, "r", manager=manager) as reader:
>>> errors = []
>>> for ns in validate_namespaces:
>>> errors += validate(io=reader, namespace=ns)
:param path: Path for the NWB file
:return: Tuple with:
- List of strings with the most specific namespace(s) to use for validation.
- BuildManager object for opening the file for validation
- Dict with the full result from NWBHDF5IO.load_namespaces
"""
from hdmf.build import BuildManager, TypeMap
from hdmf.spec import NamespaceCatalog
from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace

catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
ns_deps = NWBHDF5IO.load_namespaces(catalog, path)
# determine which namespaces are the most specific (i.e. extensions) and validate against those
s = set(ns_deps.keys())
for k in ns_deps:
s -= ns_deps[k].keys()
# TODO remove this workaround for issue
# https://github.com/NeurodataWithoutBorders/pynwb/issues/1357
if "hdmf-experimental" in s:
s.remove("hdmf-experimental") # remove validation of hdmf-experimental for now
namespaces = list(sorted(s))

if len(namespaces) > 0:
tm = TypeMap(catalog)
manager = BuildManager(tm)
else:
manager = None

return namespaces, manager, ns_deps


def validate_namespaces(path: Union[str, Path]):
"""pynwb.validate which validates each validatable namespace separately
Proposed by @orugbel in https://github.com/dandi/dandi-cli/issues/917#issuecomment-1045154252
"""
namespaces_validate, manager, namespaces_cached = get_cached_namespaces_to_validate(
path
)
with NWBHDF5IO(path, "r", manager=manager) as reader:
errors = []
for ns in namespaces_validate:
errors += validate(io=reader, namespace=ns)
return errors


@validate_cache.memoize_path
def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
"""Run validation on a file and return errors
Expand All @@ -333,8 +403,7 @@ def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
path = str(path) # Might come in as pathlib's PATH
errors: List[str]
try:
with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader:
errors = pynwb.validate(reader)
errors = validate_namespaces(path)
lgr.warning(
"pynwb validation errors for %s: %s",
path,
Expand Down

0 comments on commit eaeb8c9

Please sign in to comment.