From db6015d69f0e52e7e0107931fac07990aa3331d1 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Fri, 3 May 2019 16:11:32 -0600 Subject: [PATCH 1/9] Contiguous store with unlim dim bug fix --- xarray/backends/netCDF4_.py | 17 +++++++++++++++-- xarray/tests/test_backends.py | 6 ++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index b26d5575d23..436dd19a103 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -183,7 +183,7 @@ def _force_native_endianness(var): def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, lsd_okay=True, h5py_okay=False, - backend='netCDF4', unlimited_dims=None): + backend='netCDF4', unlimited_dims=None, name=None): if unlimited_dims is None: unlimited_dims = () @@ -210,6 +210,18 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, if chunks_too_big or changed_shape: del encoding['chunksizes'] + var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) + if var_has_unlim_dim: + if 'contiguous' in encoding.keys(): + del encoding['contiguous'] # sufficent for output. + # This does not modify the variable (encoding) itself. + if name is None: name = '(name not supplied)' + warnings.warn( # the gnarly warning message. + "The variable " + name + " contains an unlimited dimension " + "and has encoding['contiguous']=True. This is not possible " + "for netCDF4 and the variable is written to file chunked." + ) + for k in safe_to_drop: if k in encoding: del encoding[k] @@ -444,7 +456,8 @@ def prepare_variable(self, name, variable, check_encoding=False, encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, - unlimited_dims=unlimited_dims) + unlimited_dims=unlimited_dims, name=name) + if name in self.ds.variables: nc4_var = self.ds.variables[name] else: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5e28ff46665..a7e0da08f5d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3508,6 +3508,12 @@ def test_extract_nc4_variable_encoding(self): encoding = _extract_nc4_variable_encoding(var, raise_on_invalid=True) assert {'shuffle': True} == encoding + # Variables with unlim dims must be chunked on output. + var = xr.Variable(('x',), [1, 2, 3], {}, {'contiguous': True}) + encoding = _extract_nc4_variable_encoding(var, unlimited_dims=('x',)) + assert {} == encoding + + def test_extract_h5nc_encoding(self): # not supported with h5netcdf (yet) var = xr.Variable(('x',), [1, 2, 3], {}, From ffbc9bdcf0e11998ec8c0e9a181f57e47b12dbb0 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Fri, 3 May 2019 17:12:04 -0600 Subject: [PATCH 2/9] It is new: Bug fix to 1849, need chunking for vars with unlim dims in netcdf4. --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 27709a09e7a..f763990e639 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,9 @@ Enhancements Bug fixes ~~~~~~~~~ +- NetCDF4 output: variables with unlimited dimensions must be chunked (not + contiguous)on output. (:issue:`1849`) + By `James McCreight `_. - indexing with an empty list creates an object with zero-length axis (:issue:`2882`) By `Mayeul d'Avezac `_. - Return correct count for scalar datetime64 arrays (:issue:`2770`) From 45d5c4f9bf0ba955407bf7a21b85ca45ac33b53f Mon Sep 17 00:00:00 2001 From: James McCreight Date: Fri, 3 May 2019 17:20:35 -0600 Subject: [PATCH 3/9] pep8 spoke --- xarray/backends/netCDF4_.py | 12 +++++++----- xarray/tests/test_backends.py | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 436dd19a103..3cf430278f8 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -183,7 +183,8 @@ def _force_native_endianness(var): def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, lsd_okay=True, h5py_okay=False, - backend='netCDF4', unlimited_dims=None, name=None): + backend='netCDF4', unlimited_dims=None, + name=None): if unlimited_dims is None: unlimited_dims = () @@ -213,10 +214,11 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) if var_has_unlim_dim: if 'contiguous' in encoding.keys(): - del encoding['contiguous'] # sufficent for output. - # This does not modify the variable (encoding) itself. - if name is None: name = '(name not supplied)' - warnings.warn( # the gnarly warning message. + del encoding['contiguous'] + # The above does not modify the variable (encoding) itself. + if name is None: + name = '(name not supplied)' + warnings.warn( "The variable " + name + " contains an unlimited dimension " "and has encoding['contiguous']=True. This is not possible " "for netCDF4 and the variable is written to file chunked." diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a7e0da08f5d..ce945dbf5ff 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3513,7 +3513,6 @@ def test_extract_nc4_variable_encoding(self): encoding = _extract_nc4_variable_encoding(var, unlimited_dims=('x',)) assert {} == encoding - def test_extract_h5nc_encoding(self): # not supported with h5netcdf (yet) var = xr.Variable(('x',), [1, 2, 3], {}, From 2945b3fbb6c3e39ea2c4f97f349c12bc898770b8 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 20 May 2019 12:58:35 -0600 Subject: [PATCH 4/9] white space addition --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f763990e639..1a167d46ecf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,7 +29,7 @@ Bug fixes ~~~~~~~~~ - NetCDF4 output: variables with unlimited dimensions must be chunked (not - contiguous)on output. (:issue:`1849`) + contiguous) on output. (:issue:`1849`) By `James McCreight `_. - indexing with an empty list creates an object with zero-length axis (:issue:`2882`) By `Mayeul d'Avezac `_. From 14938cd81a08b677f6019949aeeae713d48bfd3e Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 20 May 2019 14:15:12 -0600 Subject: [PATCH 5/9] rm warning, combine if --- xarray/backends/netCDF4_.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8ea5a3f0248..e9a9902f0f9 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -183,8 +183,7 @@ def _force_native_endianness(var): def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, lsd_okay=True, h5py_okay=False, - backend='netCDF4', unlimited_dims=None, - name=None): + backend='netCDF4', unlimited_dims=None): if unlimited_dims is None: unlimited_dims = () @@ -212,17 +211,8 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, del encoding['chunksizes'] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) - if var_has_unlim_dim: - if 'contiguous' in encoding.keys(): - del encoding['contiguous'] - # The above does not modify the variable (encoding) itself. - if name is None: - name = '(name not supplied)' - warnings.warn( - "The variable " + name + " contains an unlimited dimension " - "and has encoding['contiguous']=True. This is not possible " - "for netCDF4 and the variable is written to file chunked." - ) + if var_has_unlim_dim and 'contiguous' in encoding.keys(): + del encoding['contiguous'] for k in safe_to_drop: if k in encoding: @@ -458,7 +448,7 @@ def prepare_variable(self, name, variable, check_encoding=False, encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, - unlimited_dims=unlimited_dims, name=name) + unlimited_dims=unlimited_dims) if name in self.ds.variables: nc4_var = self.ds.variables[name] From 5517731b4cd55abca2ed84ff8ee2e96dec7300f8 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 3 Jun 2019 12:33:33 -0600 Subject: [PATCH 6/9] check invalid encoding --- xarray/backends/netCDF4_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e9a9902f0f9..e9f07e41985 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -211,7 +211,7 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, del encoding['chunksizes'] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) - if var_has_unlim_dim and 'contiguous' in encoding.keys(): + if not raise_on_invalid and var_has_unlim_dim and 'contiguous' in encoding.keys(): del encoding['contiguous'] for k in safe_to_drop: From e82b7ddaf03945db84c13d2d5c648134a41ab867 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 3 Jun 2019 16:46:08 -0600 Subject: [PATCH 7/9] pep8 strikes --- xarray/backends/netCDF4_.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e9f07e41985..7dd6080cd27 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -211,7 +211,8 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, del encoding['chunksizes'] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) - if not raise_on_invalid and var_has_unlim_dim and 'contiguous' in encoding.keys(): + if not raise_on_invalid and \ + var_has_unlim_dim and 'contiguous' in encoding.keys(): del encoding['contiguous'] for k in safe_to_drop: From ab7d7260f2b11c9b902b504cd2e86dfeb8d07b15 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 4 Jun 2019 08:49:13 -0600 Subject: [PATCH 8/9] Update netCDF4_.py --- xarray/backends/netCDF4_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 7dd6080cd27..3910298a9f3 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -211,8 +211,8 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, del encoding['chunksizes'] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) - if not raise_on_invalid and \ - var_has_unlim_dim and 'contiguous' in encoding.keys(): + if (not raise_on_invalid and + var_has_unlim_dim and 'contiguous' in encoding.keys()): del encoding['contiguous'] for k in safe_to_drop: From 9f2fcda6693bea0adbf2f94ab57605a88da060d0 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 4 Jun 2019 09:16:01 -0700 Subject: [PATCH 9/9] Line wrapping --- xarray/backends/netCDF4_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 3910298a9f3..e411fd3a80e 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -211,8 +211,8 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, del encoding['chunksizes'] var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims) - if (not raise_on_invalid and - var_has_unlim_dim and 'contiguous' in encoding.keys()): + if (not raise_on_invalid and var_has_unlim_dim + and 'contiguous' in encoding.keys()): del encoding['contiguous'] for k in safe_to_drop: