Skip to content

Commit

Permalink
FIX: Add fixes to allow reading kerchunk catalog (#485)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgrover1 authored Jun 26, 2022
1 parent fb5d892 commit 8a3aa79
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 14 deletions.
1 change: 1 addition & 0 deletions intake_esm/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Config:
class DataFormat(str, enum.Enum):
netcdf = 'netcdf'
zarr = 'zarr'
reference = 'reference'

class Config:
validate_all = True
Expand Down
23 changes: 11 additions & 12 deletions intake_esm/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ class ESMDataSourceError(Exception):
def _get_xarray_open_kwargs(data_format, xarray_open_kwargs=None, storage_options=None):
xarray_open_kwargs = (xarray_open_kwargs or {}).copy()
_default_open_kwargs = {
'engine': 'zarr' if data_format == 'zarr' else 'netcdf4',
'engine': 'zarr' if data_format in {'zarr', 'reference'} else 'netcdf4',
'chunks': {},
'backend_kwargs': {},
}
if not xarray_open_kwargs:
xarray_open_kwargs = _default_open_kwargs
else:
xarray_open_kwargs = {**_default_open_kwargs, **xarray_open_kwargs}
xarray_open_kwargs = (
{**_default_open_kwargs, **xarray_open_kwargs}
if xarray_open_kwargs
else _default_open_kwargs
)

if (
xarray_open_kwargs['engine'] == 'zarr'
and 'storage_options' not in xarray_open_kwargs['backend_kwargs']
Expand All @@ -47,19 +49,16 @@ def _open_dataset(
data_format=None,
):

_can_be_local = fsspec.utils.can_be_local(urlpath)
storage_options = xarray_open_kwargs.get('backend_kwargs', {}).get('storage_options', {})

# Support kerchunk datasets, setting the file object (fo) and urlpath
if data_format == 'reference':
if 'storage_options' not in xarray_open_kwargs.keys():
xarray_open_kwargs['storage_options'] = {}
xarray_open_kwargs['storage_options']['fo'] = urlpath
xarray_open_kwargs['backend_kwargs']['storage_options']['fo'] = urlpath
xarray_open_kwargs['backend_kwargs']['consolidated'] = False
urlpath = 'reference://'

if xarray_open_kwargs['engine'] == 'zarr':
url = urlpath
elif _can_be_local:
elif fsspec.utils.can_be_local(urlpath):
url = fsspec.open_local(urlpath, **storage_options)
else:
url = fsspec.open(urlpath, **storage_options).open()
Expand All @@ -77,6 +76,7 @@ def _open_dataset(

if varname and isinstance(varname, str):
varname = [varname]

if requested_variables:
if isinstance(requested_variables, str):
requested_variables = [requested_variables]
Expand Down Expand Up @@ -214,7 +214,6 @@ def _open_dataset(self):
"""Open dataset with xarray"""

try:

datasets = [
_open_dataset(
record[self.path_column_name],
Expand Down
7 changes: 5 additions & 2 deletions tests/test_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
'sample_data/kerchunk-files/noaa-nwm-test-reference.json',
)

multi_path = os.path.dirname(f1) + '/*.nc'
multi_path = f'{os.path.dirname(f1)}/*.nc'


def _common_open(fpath, varname='tasmax'):
Expand All @@ -44,10 +44,13 @@ def test_get_xarray_open_kwargs(storage_options):


def test_open_dataset_kerchunk(kerchunk_file=kerchunk_file):
xarray_open_kwargs = _get_xarray_open_kwargs(
'reference', dict(engine='zarr', consolidated=False), storage_options={}
)
ds = _open_dataset(
data_format='reference',
urlpath=kerchunk_file,
varname=None,
xarray_open_kwargs=dict(engine='zarr', consolidated=False),
xarray_open_kwargs=xarray_open_kwargs,
).compute()
assert isinstance(ds, xarray.Dataset)

0 comments on commit 8a3aa79

Please sign in to comment.