Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Contiguous store with unlim dim bug fix #2941

Merged
merged 10 commits into from
Jun 4, 2019
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ Enhancements
Bug fixes
~~~~~~~~~

- NetCDF4 output: variables with unlimited dimensions must be chunked (not
contiguous) on output. (:issue:`1849`)
By `James McCreight <https://github.com/jmccreight>`_.
- indexing with an empty list creates an object with zero-length axis (:issue:`2882`)
By `Mayeul d'Avezac <https://github.com/mdavezac>`_.
- Return correct count for scalar datetime64 arrays (:issue:`2770`)
Expand Down
19 changes: 17 additions & 2 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ def _force_native_endianness(var):

def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
lsd_okay=True, h5py_okay=False,
backend='netCDF4', unlimited_dims=None):
backend='netCDF4', unlimited_dims=None,
name=None):
if unlimited_dims is None:
unlimited_dims = ()

Expand All @@ -210,6 +211,19 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
if chunks_too_big or changed_shape:
del encoding['chunksizes']

var_has_unlim_dim = any(dim in unlimited_dims for dim in variable.dims)
if var_has_unlim_dim:
jmccreight marked this conversation as resolved.
Show resolved Hide resolved
if 'contiguous' in encoding.keys():
del encoding['contiguous']
# The above does not modify the variable (encoding) itself.
if name is None:
jmccreight marked this conversation as resolved.
Show resolved Hide resolved
name = '(name not supplied)'
warnings.warn(
"The variable " + name + " contains an unlimited dimension "
"and has encoding['contiguous']=True. This is not possible "
"for netCDF4 and the variable is written to file chunked."
)

for k in safe_to_drop:
if k in encoding:
del encoding[k]
Expand Down Expand Up @@ -444,7 +458,8 @@ def prepare_variable(self, name, variable, check_encoding=False,

encoding = _extract_nc4_variable_encoding(
variable, raise_on_invalid=check_encoding,
unlimited_dims=unlimited_dims)
unlimited_dims=unlimited_dims, name=name)
jmccreight marked this conversation as resolved.
Show resolved Hide resolved

if name in self.ds.variables:
nc4_var = self.ds.variables[name]
else:
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3525,6 +3525,11 @@ def test_extract_nc4_variable_encoding(self):
encoding = _extract_nc4_variable_encoding(var, raise_on_invalid=True)
assert {'shuffle': True} == encoding

# Variables with unlim dims must be chunked on output.
var = xr.Variable(('x',), [1, 2, 3], {}, {'contiguous': True})
encoding = _extract_nc4_variable_encoding(var, unlimited_dims=('x',))
assert {} == encoding

def test_extract_h5nc_encoding(self):
# not supported with h5netcdf (yet)
var = xr.Variable(('x',), [1, 2, 3], {},
Expand Down