Skip to content

Commit

Permalink
Merge pull request #1104 from TheChymera/ngff
Browse files Browse the repository at this point in the history
[ENH] NGFF format support
  • Loading branch information
effigies authored Jul 7, 2022
2 parents 0749755 + 988bd2d commit f2e34d4
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 21 deletions.
11 changes: 5 additions & 6 deletions src/04-modality-specific-files/10-microscopy.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ by the [Open Microscopy Environment](https://www.openmicroscopy.org/) for whole-
the [OME-TIFF file specifications](https://docs.openmicroscopy.org/ome-model/6.1.2/ome-tiff/file-structure.html).
The OME-TIFF file allows for multi-page TIFF files to store multiple image planes and supports
multi-resolution pyramidal tiled images. An OME-XML data block is also embedded inside the
file’s header.
file’s header. Further, OME-ZARR (sometimes referred to as OME-NGFF or NGFF) has been developed to provide improved
access and storage for large data via chunked and compressed N-dimensional arrays.

The BIDS standard accepts microscopy data in a number of file formats to accommodate datasets
stored in 2D image formats and whole-slide imaging formats, to accommodate lossless and lossy
Expand All @@ -54,12 +55,10 @@ Microscopy raw data MUST be stored in one of the following formats:
(`.ome.tif` for standard TIFF files or `.ome.btf` for
[BigTIFF](https://www.awaresystems.be/imaging/tiff/bigtiff.html) files)

If different from PNG, TIFF or OME-TIFF, the original unprocessed data in the native format MAY be
stored in the [`/sourcedata` directory](../02-common-principles.md#source-vs-raw-vs-derived-data).
- [OME-ZARR/NGFF](https://ngff.openmicroscopy.org/latest/) (`.ome.zarr` directories)

Future versions may extend this list of supported file formats, for example with the
Next-Generation File Formats currently developed by OME ([OME-NGFF](https://ngff.openmicroscopy.org/latest/))
as a successor to OME-TIFF for better remote sharing of large datasets.
If different from PNG, TIFF, OME-TIFF, or OME-ZARR, the original unprocessed data in the native format MAY be
stored in the [`/sourcedata` directory](../02-common-principles.md#source-vs-raw-vs-derived-data).

### Modality suffixes
Microscopy data currently support the following imaging modalities:
Expand Down
7 changes: 7 additions & 0 deletions src/schema/objects/extensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@
Used by KIT, Yokogawa, and Ricoh MEG systems.
Successor to the `.sqd` extension for marker files.
.ome.zarr/:
name: OME Next Generation File Format
description: |
An OME-NGFF file.
OME-NGFF is a [Zarr](https://zarr.readthedocs.io)-based format, organizing data arrays in nested directories.
This format was developed by the Open Microscopy Environment to provide data stream access to very large data.
.nii:
name: NIfTI
description: |
Expand Down
1 change: 1 addition & 0 deletions src/schema/rules/datatypes/micr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ microscopy:
extensions:
- .ome.tif
- .ome.btf
- .ome.zarr/
- .png
- .tif
- .json
Expand Down
2 changes: 1 addition & 1 deletion tools/schemacode/schemacode/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def test_load_all():
os.path.abspath(os.path.dirname(__file__)),
"../data/schema",
)
schema_all = load_all(schema_path)
schema_all, _ = load_all(schema_path)

# Check if expected keys are present in all entries
for entry in schema_all:
Expand Down
68 changes: 54 additions & 14 deletions tools/schemacode/schemacode/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
DIR_ENTITIES = ["subject", "session"]


def _get_paths(bids_paths):
def _get_paths(
bids_paths,
pseudofile_suffixes=[],
):
"""
Get all paths from a list of directories, excluding hidden subdirectories from distribution.
Expand All @@ -25,6 +28,9 @@ def _get_paths(bids_paths):
bids_paths : list or str
Directories from which to get paths, may also contain file paths, which will remain
unchanged.
pseudofile_suffixes : list of str
Directory suffixes prompting the validation of the directory name and limiting further
directory walk.
Notes
-----
Expand All @@ -47,23 +53,19 @@ def _get_paths(bids_paths):
".bidsignore",
"dandiset.yaml",
]
# Inelegant hard-coded solution.
# Could be replaced by a maximum depth limit if BIDS root auto-detection is implemented.
treat_as_file_suffix = [".ngff"]

path_list = []
for bids_path in bids_paths:
bids_path = os.path.abspath(os.path.expanduser(bids_path))
if os.path.isfile(bids_path):
path_list.append(bids_path)
continue
for root, dirs, file_names in os.walk(bids_path, topdown=False):
if any(root.endswith(i) for i in treat_as_file_suffix):
continue
if any(f"{i}/" in root for i in treat_as_file_suffix):
continue
if any(f"{i}\\" in root for i in treat_as_file_suffix):
continue
for root, dirs, file_names in os.walk(bids_path, topdown=True):
if any(root.endswith(i) for i in pseudofile_suffixes):
# Add the directory name to the validation paths list.
path_list.append(f"{root}/")
# Do not index the contents of the directory.
dirs[:] = []
# will break if BIDS ever puts meaningful data under `/.{dandi,datalad,git}*/`
if any(exclude_subdir in root for exclude_subdir in exclude_subdirs):
continue
Expand Down Expand Up @@ -335,6 +337,8 @@ def load_all(
-------
all_regex : list of dict
A list of dictionaries, with keys including 'regex' and 'mandatory'.
my_schema : list of dict
Nested dictionaries representing the full schema.
"""

my_schema = schema.load_schema(schema_dir)
Expand All @@ -346,13 +350,14 @@ def load_all(
)
all_regex.extend(top_level_regex)

return all_regex
return all_regex, my_schema


def validate_all(
bids_paths,
regex_schema,
debug=False,
pseudofile_suffixes=[],
):
"""
Validate `bids_paths` based on a `regex_schema` dictionary list, including regexes.
Expand All @@ -366,6 +371,11 @@ def validate_all(
debug : tuple, optional
Whether to print itemwise notices for checks on the console, and include them in the
validation result.
pseudofile_suffixes : list of str, optional
Any suffixes which identify BIDS-valid directory data.
These pseudo-file suffixes will be validated based on the directory name, with the
directory contents not being indexed for validation.
By default, no pseudo-file suffixes are checked.
Returns
-------
Expand All @@ -384,7 +394,7 @@ def validate_all(
"""

tracking_schema = deepcopy(regex_schema)
paths_list = _get_paths(bids_paths)
paths_list = _get_paths(bids_paths, pseudofile_suffixes=pseudofile_suffixes)
tracking_paths = deepcopy(paths_list)
if debug:
itemwise_results = []
Expand Down Expand Up @@ -658,6 +668,34 @@ def log_errors(validation_result):
lgr.warning("The `%s` file was not matched by any regex schema entry.", i)


def _get_directory_suffixes(my_schema):
"""Query schema for suffixes which identify directory entities.
Parameters
----------
my_schema : dict
Nested directory as produced by `schemacode.schema.load_schema()`.
Returns
-------
list of str
Directory pseudofile suffixes excluding trailing slashes.
Notes
-----
* Yes this seems super-awkward to do explicitly, after all, the trailing slash is
already in so it should automagically work, but no:
- Subdirectory names need to be dynamically excluded from validation input.
- Backslash directory delimiters are still in use, which is regrettable.
"""
pseudofile_suffixes = []
for i in my_schema["objects"]["extensions"]:
if i.endswith("/"):
if i != "/":
pseudofile_suffixes.append(i[:-1])
return pseudofile_suffixes


def validate_bids(
bids_paths,
schema_reference_root="/usr/share/bids-schema/",
Expand Down Expand Up @@ -716,11 +754,13 @@ def validate_bids(
bids_paths = [bids_paths]

bids_schema_dir = select_schema_dir(bids_paths, schema_reference_root, schema_version)
regex_schema = load_all(bids_schema_dir)
regex_schema, my_schema = load_all(bids_schema_dir)
pseudofile_suffixes = _get_directory_suffixes(my_schema)
validation_result = validate_all(
bids_paths,
regex_schema,
debug=debug,
pseudofile_suffixes=pseudofile_suffixes,
)

log_errors(validation_result)
Expand Down

0 comments on commit f2e34d4

Please sign in to comment.