Skip to content

Commit

Permalink
Handle legacy nested datasets (with tests) (#850)
Browse files Browse the repository at this point in the history
* Fix #840

* Add legacy tests (See #840)

Each fixture (flat & nested) should have a version
which does not include any new metadata.

* Fix PEP8 issues

* Try dropping editable installs

see: pypa/pip#10573

* Handle case of store being a dict

* Exclude unexpected failure from code coverage

* Add 2.10.2 release notes
  • Loading branch information
joshmoore authored Oct 19, 2021
1 parent 4f8cb35 commit d1dc987
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ jobs:
shell: "bash -l {0}"
run: |
conda activate minimal
python -m pip install -e .
python -m pip install .
pytest -svx
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install -U pip setuptools wheel codecov line_profiler
python -m pip install -rrequirements_dev_minimal.txt numpy${{ matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis
python -m pip install -e .
python -m pip install .
python -m pip freeze
- name: Tests
shell: "bash -l {0}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install -U pip setuptools wheel
python -m pip install -r requirements_dev_numpy.txt -r requirements_dev_minimal.txt -r requirements_dev_optional.txt
python -m pip install -e .
python -m pip install .
python -m pip freeze
npm install -g azurite
- name: Run Tests
Expand Down
11 changes: 11 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@ Release notes
Unreleased
----------

.. _release_2.10.2:

2.10.2
------

Bug fixes
~~~~~~~~~

* Fix NestedDirectoryStore datasets without dimension_separator metadata.
By :user:`Josh Moore <joshmoore>`; :issue:`850`.

.. _release_2.10.1:

2.10.1
Expand Down
3 changes: 2 additions & 1 deletion fixture/flat/.zarray
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"id": "blosc",
"shuffle": 1
},
"dimension_separator": ".",
"dtype": "<i8",
"fill_value": 0,
"filters": null,
Expand All @@ -19,4 +20,4 @@
2
],
"zarr_format": 2
}
}
22 changes: 22 additions & 0 deletions fixture/flat_legacy/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"chunks": [
2,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i8",
"fill_value": 0,
"filters": null,
"order": "C",
"shape": [
2,
2
],
"zarr_format": 2
}
Binary file added fixture/flat_legacy/0.0
Binary file not shown.
22 changes: 22 additions & 0 deletions fixture/nested_legacy/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"chunks": [
2,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i8",
"fill_value": 0,
"filters": null,
"order": "C",
"shape": [
2,
2
],
"zarr_format": 2
}
Binary file added fixture/nested_legacy/0/0
Binary file not shown.
13 changes: 12 additions & 1 deletion zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,18 @@ def _load_metadata_nosync(self):
self._dtype = meta['dtype']
self._fill_value = meta['fill_value']
self._order = meta['order']
self._dimension_separator = meta.get('dimension_separator', '.')

dimension_separator = meta.get('dimension_separator', None)
if dimension_separator is None:
try:
dimension_separator = self._store._dimension_separator
except (AttributeError, KeyError):
pass

# Fallback for any stores which do not choose a default
if dimension_separator is None:
dimension_separator = "."
self._dimension_separator = dimension_separator

# setup compressor
config = meta['compressor']
Expand Down
4 changes: 3 additions & 1 deletion zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
else:
object_codec = None

dimension_separator = meta.get('dimension_separator', None)
fill_value = decode_fill_value(meta['fill_value'], dtype, object_codec)
meta = dict(
zarr_format=meta['zarr_format'],
Expand All @@ -57,8 +58,9 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
fill_value=fill_value,
order=meta['order'],
filters=meta['filters'],
dimension_separator=meta.get('dimension_separator', '.'),
)
if dimension_separator:
meta['dimension_separator'] = dimension_separator

except Exception as e:
raise MetadataError('error decoding metadata: %s' % e)
Expand Down
67 changes: 53 additions & 14 deletions zarr/tests/test_dim_separator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
needs_fsspec = pytest.mark.skipif(not have_fsspec, reason="needs fsspec")


@pytest.fixture(params=("static_nested",
"static_flat",
@pytest.fixture(params=("static_flat",
"static_flat_legacy",
"static_nested",
"static_nested_legacy",
"directory_nested",
"directory_flat",
"directory_default",
Expand All @@ -35,14 +37,16 @@ def dataset(tmpdir, request):

if which.startswith("static"):
project_root = pathlib.Path(zarr.__file__).resolve().parent.parent
if which.endswith("nested"):
static = project_root / "fixture/nested"
generator = NestedDirectoryStore
else:
static = project_root / "fixture/flat"
generator = DirectoryStore
suffix = which[len("static_"):]
static = project_root / "fixture" / suffix

if not static.exists(): # pragma: no cover

if "nested" in which:
generator = NestedDirectoryStore
else:
generator = DirectoryStore

# store the data - should be one-time operation
s = generator(str(static))
a = zarr.open(store=s, mode="w", shape=(2, 2), dtype="<i8")
Expand All @@ -69,22 +73,57 @@ def dataset(tmpdir, request):
return str(loc)


def verify(array):
assert_array_equal(array[:], [[1, 2], [3, 4]])
def verify(array, expect_failure=False):
try:
assert_array_equal(array[:], [[1, 2], [3, 4]])
except AssertionError:
if expect_failure:
pytest.xfail()
else:
raise # pragma: no cover


def test_open(dataset):
verify(zarr.open(dataset, "r"))
"""
Use zarr.open to open the dataset fixture. Legacy nested datatsets
without the dimension_separator metadata are not expected to be
openable.
"""
failure = "nested_legacy" in dataset
verify(zarr.open(dataset, "r"), failure)


@needs_fsspec
def test_fsstore(dataset):
verify(Array(store=FSStore(dataset)))
"""
Use FSStore to open the dataset fixture. Legacy nested datatsets
without the dimension_separator metadata are not expected to be
openable.
"""
failure = "nested_legacy" in dataset
verify(Array(store=FSStore(dataset)), failure)


def test_directory(dataset):
verify(zarr.Array(store=DirectoryStore(dataset)))
"""
Use DirectoryStore to open the dataset fixture. Legacy nested datatsets
without the dimension_separator metadata are not expected to be
openable.
"""
failure = "nested_legacy" in dataset
verify(zarr.Array(store=DirectoryStore(dataset)), failure)


def test_nested(dataset):
verify(Array(store=NestedDirectoryStore(dataset)))
"""
Use NestedDirectoryStore to open the dataset fixture. This is the only
method that is expected to successfully open legacy nested datasets
without the dimension_separator metadata. However, for none-Nested
datasets without any metadata, NestedDirectoryStore will fail.
"""
failure = (
"flat_legacy" in dataset or
"directory_default" in dataset or
"fs_default" in dataset
)
verify(Array(store=NestedDirectoryStore(dataset)), failure)

0 comments on commit d1dc987

Please sign in to comment.