Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added encoding options for netCDF4 variables #50

Merged
merged 1 commit into from
Mar 7, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions src/xray/backends.py
Original file line number Diff line number Diff line change
@@ -151,7 +151,23 @@ def convert_variable(var):
data = np.asscalar(var[...])
attributes = OrderedDict((k, var.getncattr(k))
for k in var.ncattrs())
return xarray.XArray(dimensions, data, attributes,
# netCDF4 specific encoding; save _FillValue for later
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to throw warnings across the board for those encoding variables found but not expected by the given data store?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do that, but are we sure that's a good thing? I can see that popping up a lot of warnings when serializing a dataset from netCDF4 to netCDF3, for example...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that unreasonable? Maybe you're right. If at some point it's
worthwhile to have different logging levels as part of the package, this is
something that could be brought back in.

On Wed, Mar 5, 2014 at 4:36 PM, Stephan Hoyer notifications@github.comwrote:

In src/xray/backends.py:

@@ -151,7 +151,23 @@ def convert_variable(var):
data = np.asscalar(var[...])
attributes = OrderedDict((k, var.getncattr(k))
for k in var.ncattrs())

  •        return xarray.XArray(dimensions, data, attributes,
    
  •        # netCDF4 specific encoding; save _FillValue for later
    

We could do that, but are we sure that's a good thing? I can see that
popping up a lot of warnings when serializing a dataset from netCDF4 to
netCDF3, for example...

Reply to this email directly or view it on GitHubhttps://github.com/akleeman/xray/pull/50/files#r10326856
.

encoding = {}
filters = var.filters()
if filters is not None:
encoding.update(filters)
chunking = var.chunking()
if chunking is not None:
if chunking == 'contiguous':
encoding['contiguous'] = True
encoding['chunksizes'] = None
else:
encoding['contiguous'] = False
encoding['chunksizes'] = tuple(chunking)
encoding['endian'] = var.endian()
encoding['least_significant_digit'] = \
attributes.pop('least_significant_digit', None)
return xarray.XArray(dimensions, data, attributes, encoding,
indexing_mode='orthogonal')
return FrozenOrderedDict((k, convert_variable(v))
for k, v in self.ds.variables.iteritems())
@@ -180,10 +196,20 @@ def set_variable(self, name, variable):
# we let the package handle the _FillValue attribute
# instead of setting it ourselves.
fill_value = variable.attributes.pop('_FillValue', None)
self.ds.createVariable(varname=name,
datatype=variable.dtype,
dimensions=variable.dimensions,
fill_value=fill_value)
encoding = variable.encoding
self.ds.createVariable(
varname=name,
datatype=variable.dtype,
dimensions=variable.dimensions,
zlib=encoding.get('zlib', False),
complevel=encoding.get('complevel', 4),
shuffle=encoding.get('shuffle', True),
fletcher32=encoding.get('fletcher32', False),
contiguous=encoding.get('contiguous', False),
chunksizes=encoding.get('chunksizes'),
endian=encoding.get('endian', 'native'),
least_significant_digit=encoding.get('least_significant_digit'),
fill_value=fill_value)
nc4_var = self.ds.variables[name]
nc4_var.set_auto_maskandscale(False)
nc4_var[:] = variable.data[:]
13 changes: 12 additions & 1 deletion test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -508,7 +508,9 @@ def test_open_encodings(self):
actual = open_dataset(tmp_file)

self.assertXArrayEqual(actual['time'], expected['time'])
self.assertDictEqual(actual['time'].encoding, expected['time'].encoding)
actual_encoding = {k: v for k, v in actual['time'].encoding.iteritems()
if k in expected['time'].encoding}
self.assertDictEqual(actual_encoding, expected['time'].encoding)

os.remove(tmp_file)

@@ -539,6 +541,15 @@ def test_dump_and_open_encodings(self):
ds.close()
os.remove(tmp_file)

def test_compression_encoding(self):
data = create_test_data()
data['var2'].encoding.update({'zlib': True,
'chunksizes': (10, 10),
'least_significant_digit': 2})
actual = self.roundtrip(data)
for k, v in data['var2'].encoding.iteritems():
self.assertEqual(v, actual['var2'].encoding[k])

def test_mask_and_scale(self):
f, tmp_file = tempfile.mkstemp(suffix='.nc')
os.close(f)