diff --git a/xwrf/accessors.py b/xwrf/accessors.py index 8fd110c4..d1711bd3 100644 --- a/xwrf/accessors.py +++ b/xwrf/accessors.py @@ -6,7 +6,7 @@ _collapse_time_dim, _decode_times, _modify_attrs_to_cf, - _remove_units_from_bool_arrays, + _remove_invalid_units, ) @@ -41,7 +41,7 @@ def postprocess(self, decode_times=True) -> xr.Dataset: """ ds = ( self.xarray_obj.pipe(_modify_attrs_to_cf) - .pipe(_remove_units_from_bool_arrays) + .pipe(_remove_invalid_units) .pipe(_collapse_time_dim) ) if decode_times: diff --git a/xwrf/config.yaml b/xwrf/config.yaml index b058b950..92b53abf 100644 --- a/xwrf/config.yaml +++ b/xwrf/config.yaml @@ -22,9 +22,13 @@ time_coords: - Time - time -boolean_units_attrs: +invalid_units_attrs: - '-' - flag + - '0/1 Flag' + - 'whoknows' + - 'category' + - 'none' cf_attribute_map: ZNW: @@ -92,3 +96,23 @@ cf_attribute_map: units: Pa ST: units: kelvin + SM100255: + units: dimensionless + SM028100: + units: dimensionless + SM007028: + units: dimensionless + SM000007: + units: dimensionless + SCB_DOM: + units: dimensionless + SCB_DOM: + units: dimensionless + COSALPHA_V: + units: dimensionless + GREENFRAC: + units: dimensionless + SOILTEMP: + units: kelvin + RH: + units: % diff --git a/xwrf/io_plugin.py b/xwrf/io_plugin.py deleted file mode 100644 index 7053da1b..00000000 --- a/xwrf/io_plugin.py +++ /dev/null @@ -1,134 +0,0 @@ -import itertools -import os -import pathlib -import re -import warnings - -import pandas as pd -import xarray as xr - -from .config import config - -_LAT_COORDS = ('XLAT', 'XLAT_M', 'XLAT_U', 'XLAT_V', 'CLAT', 'XLAT_C') - -_LON_COORDS = ('XLONG', 'XLONG_M', 'XLONG_U', 'XLONG_V', 'CLONG', 'XLONG_C') - -_TIME_COORD_VARS = ('XTIME', 'Times', 'Time', 'time') - -_ALL_COORDS = set(itertools.chain(*[_LAT_COORDS, _LON_COORDS, _TIME_COORD_VARS])) - -_BOOLEAN_UNITS_ATTRS = ('-', 'flag', '0/1 Flag') - - -def is_remote_uri(path: str) -> bool: - """Finds URLs of the form protocol:// or protocol:: - This also matches for http[s]://, which were the only remote URLs - supported in <=v0.16.2. - """ - return bool(re.search(r'^[a-z][a-z0-9]*(\://|\:\:)', path)) - - -def _normalize_path(path): - if isinstance(path, pathlib.Path): - path = str(path) - - if isinstance(path, str) and not is_remote_uri(path): - path = os.path.abspath(os.path.expanduser(path)) - - return path - - -def clean(dataset): - """ - Clean up the dataset. - """ - coords = set(dataset.variables).intersection(_ALL_COORDS) - dataset = dataset.set_coords(coords) - for coord in dataset.coords: - attrs = dataset[coord].attrs - encoding = dataset[coord].encoding - if coord in _TIME_COORD_VARS: - try: - dataset[coord].data = pd.to_datetime( - list(map(lambda x: x.decode('utf-8'), dataset[coord].data.tolist())), - format='%Y-%m-%d_%H:%M:%S', - ) - except: - warnings.warn(f'Failed to parse time coordinate: {coord}', stacklevel=2) - - elif coord in (_LON_COORDS + _LAT_COORDS) and dataset[coord].ndim == 3: - - attrs = dataset[coord].attrs - encoding = dataset[coord].encoding - dataset = dataset.assign_coords( - {coord: (dataset[coord].dims[1:], dataset[coord].data[0, :, :])} - ) - dataset[coord].attrs = attrs - dataset[coord].encoding = encoding - - return dataset - - -def make_units_quantify_ready(dataset): - for var in dataset.data_vars: - if dataset[var].attrs.get('units') in _BOOLEAN_UNITS_ATTRS: - dataset[var].attrs.pop('units', None) - - -def modify_attrs_to_cf(dataset): - vars_to_update = set(config.get('cf_attribute_map').keys()).intersection(set(dataset.keys())) - - for var in vars_to_update: - dataset[var].attrs.update(config.get(f'cf_attribute_map.{var}')) - - -class WRFBackendEntrypoint(xr.backends.BackendEntrypoint): - def open_dataset( - self, - filename_or_obj, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - group=None, - mode='r', - format='NETCDF4', - clobber=True, - diskless=False, - persist=False, - lock=None, - autoclose=False, - ): - - filename_or_obj = _normalize_path(filename_or_obj) - store = xr.backends.NetCDF4DataStore.open( - filename_or_obj, - mode=mode, - format=format, - clobber=clobber, - diskless=diskless, - persist=persist, - lock=lock, - autoclose=autoclose, - ) - - store_entrypoint = xr.backends.store.StoreBackendEntrypoint() - - with xr.core.utils.close_on_error(store): - dataset = store_entrypoint.open_dataset( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - - make_units_quantify_ready(dataset) - modify_attrs_to_cf(dataset) - return clean(dataset) diff --git a/xwrf/postprocess.py b/xwrf/postprocess.py index 2efa68c0..26853e26 100644 --- a/xwrf/postprocess.py +++ b/xwrf/postprocess.py @@ -10,21 +10,24 @@ def _decode_times(ds: xr.Dataset) -> xr.Dataset: """ Decode the time variable to datetime64. """ + try: + _time = pd.to_datetime(ds.Times.data.astype('str'), errors='raise', format='%Y-%m-%d_%H:%M:%S') + except ValueError: + _time = pd.to_datetime(ds.Times.data.astype('str'), errors='raise', format='%Y-%m-%dT%H:%M:%S.%f') ds = ds.assign_coords( { - 'Time': pd.to_datetime( - ds.Times.data.astype('str'), errors='raise', format='%Y-%m-%d_%H:%M:%S' - ) + 'Time': _time } ) ds.Time.attrs = {'long_name': 'Time', 'standard_name': 'time'} return ds -def _remove_units_from_bool_arrays(ds: xr.Dataset) -> xr.Dataset: - boolean_units_attrs = config.get('boolean_units_attrs') +def _remove_invalid_units(ds: xr.Dataset) -> xr.Dataset: + invalid_units_attrs = config.get('invalid_units_attrs') + print(invalid_units_attrs) for variable in ds.data_vars: - if ds[variable].attrs.get('units') in boolean_units_attrs: + if ds[variable].attrs.get('units') in invalid_units_attrs: ds[variable].attrs.pop('units', None) return ds