From 17bdded94b21adc35bbdbd85522a1295500c0a11 Mon Sep 17 00:00:00 2001 From: Edouard Goudenhoofdt Date: Tue, 28 Jan 2025 15:00:32 +0000 Subject: [PATCH] ADD: function to select dataset variables in sweep ADD: function to get dataset variables in sweep FIX: typo in accessors module: Dataarray -> Dataset --- docs/datamodel.md | 2 +- tests/io/test_io.py | 50 ++++++++++++++++++------------------- tests/test_util.py | 27 ++++++++++++++++++++ xradar/accessors.py | 2 +- xradar/model.py | 14 +++++------ xradar/util.py | 60 +++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 119 insertions(+), 36 deletions(-) diff --git a/docs/datamodel.md b/docs/datamodel.md index 8a76edbf..4f319afc 100644 --- a/docs/datamodel.md +++ b/docs/datamodel.md @@ -62,7 +62,7 @@ Internal Representation: {py:class}`xarray:xarray.DataArray` - DBZH, radar_equivalent_reflectivity_factor_h - DBZV, radar_equivalent_reflectivity_factor_v -- and many more, see {class}`xradar.model.sweep_dataset_vars` +- and many more, see {class}`xradar.model.sweep_observed_vars` Internal Representation: {py:class}`xarray:xarray.DataArray` diff --git a/tests/io/test_io.py b/tests/io/test_io.py index 1b7c871c..9fc4619d 100644 --- a/tests/io/test_io.py +++ b/tests/io/test_io.py @@ -24,9 +24,9 @@ open_rainbow_datatree, ) from xradar.model import ( - non_standard_sweep_dataset_vars, + non_standard_sweep_observed_vars, required_sweep_metadata_vars, - sweep_dataset_vars, + sweep_observed_vars, ) @@ -69,7 +69,7 @@ def test_open_cfradial1_datatree(cfradial1_file): assert i == int(grp[7:]) assert dict(ds.sizes) == {"time": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -89,7 +89,7 @@ def test_open_cfradial1_dataset(cfradial1_file): with xr.open_dataset(cfradial1_file, group="sweep_0", engine="cfradial1") as ds: assert list(ds.dims) == ["azimuth", "range"] assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"DBZ", "VR"} assert ds.sweep_number == 0 @@ -97,7 +97,7 @@ def test_open_cfradial1_dataset(cfradial1_file): with xr.open_dataset(cfradial1_file, group="sweep_8", engine="cfradial1") as ds: assert list(ds.dims) == ["azimuth", "range"] assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"DBZ", "VR"} assert ds.sweep_number == 8 @@ -168,7 +168,7 @@ def test_open_odim_datatree(odim_file): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -200,7 +200,7 @@ def test_open_odim_dataset(odim_file, first_dim, fix_second_angle): dim0 = "time" if first_dim == "time" else "azimuth" assert dict(ds.sizes) == {dim0: 360, "range": 1200} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"WRADH", "VRADH", "PHIDP", "DBZH", "RHOHV", "KDP", "TH", "ZDR"} assert ds.sweep_number == 0 @@ -214,7 +214,7 @@ def test_open_odim_dataset(odim_file, first_dim, fix_second_angle): ) as ds: assert dict(ds.sizes) == {dim0: 360, "range": 280} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"VRADH", "KDP", "WRADH", "TH", "RHOHV", "PHIDP", "ZDR", "DBZH"} assert ds.sweep_number == 11 @@ -323,7 +323,7 @@ def test_open_gamic_datatree(gamic_file): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -355,7 +355,7 @@ def test_open_gamic_dataset(gamic_file, first_dim, fix_second_angle): dim0 = "time" if first_dim == "time" else "azimuth" assert dict(ds.sizes) == {dim0: 361, "range": 360} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "WRADH", "WRADV", @@ -382,7 +382,7 @@ def test_open_gamic_dataset(gamic_file, first_dim, fix_second_angle): ) as ds: assert dict(ds.sizes) == {dim0: 360, "range": 1000} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "WRADH", "WRADV", @@ -437,7 +437,7 @@ def test_open_furuno_scn_dataset(furuno_scn_file): with xr.open_dataset(furuno_scn_file, first_dim="time", engine="furuno") as ds: assert dict(ds.sizes) == {"time": 1376, "range": 602} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"KDP", "VRADH", "ZDR", "DBZH", "WRADH", "RHOHV", "PHIDP"} for key, value in ds.data_vars.items(): if key in [ @@ -473,7 +473,7 @@ def test_open_furuno_scnx_dataset(furuno_scnx_file): with xr.open_dataset(furuno_scnx_file, first_dim="time", engine="furuno") as ds: assert dict(ds.sizes) == {"time": 722, "range": 936} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"KDP", "VRADH", "ZDR", "DBZH", "WRADH", "RHOHV", "PHIDP"} for key, value in ds.data_vars.items(): @@ -549,7 +549,7 @@ def test_open_rainbow_datatree(rainbow_file): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -572,7 +572,7 @@ def test_open_rainbow_dataset(rainbow_file): with xr.open_dataset(rainbow_file, group="sweep_0", engine="rainbow") as ds: assert dict(ds.sizes) == {"azimuth": 361, "range": 400} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "DBZH", } @@ -582,7 +582,7 @@ def test_open_rainbow_dataset(rainbow_file): with xr.open_dataset(rainbow_file, group="sweep_13", engine="rainbow") as ds: assert dict(ds.sizes) == {"azimuth": 361, "range": 400} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "DBZH", } @@ -645,7 +645,7 @@ def test_open_iris_datatree(iris0_file): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -669,7 +669,7 @@ def test_open_iris0_dataset(iris0_file): with xr.open_dataset(iris0_file, group="sweep_0", engine="iris") as ds: assert dict(ds.sizes) == {"azimuth": 360, "range": 664} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "DBZH", "VRADH", @@ -684,7 +684,7 @@ def test_open_iris0_dataset(iris0_file): with xr.open_dataset(iris0_file, group="sweep_9", engine="iris") as ds: assert dict(ds.sizes) == {"azimuth": 360, "range": 664} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "DBZH", "VRADH", @@ -711,7 +711,7 @@ def test_open_iris1_dataset(iris1_file): with xr.open_dataset(iris1_file, group="sweep_0", engine="iris") as ds: assert dict(ds.sizes) == {"azimuth": 359, "range": 833} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == { "DBZH", "KDP", @@ -824,7 +824,7 @@ def test_open_datamet_dataset(datamet_file): ) as ds: assert dict(ds.sizes) == {"azimuth": 360, "range": 493} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"DBTH", "DBZH", "KDP", "PHIDP", "RHOHV", "VRADH", "WRADH", "ZDR"} assert ds.sweep_number == 0 @@ -836,7 +836,7 @@ def test_open_datamet_dataset(datamet_file): ) as ds: assert dict(ds.sizes) == {"azimuth": 360, "range": 1332} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == {"DBTH", "DBZH", "KDP", "PHIDP", "RHOHV", "VRADH", "WRADH", "ZDR"} assert ds.sweep_number == 10 @@ -882,7 +882,7 @@ def test_open_datamet_datatree(datamet_file): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -975,7 +975,7 @@ def test_cfradial_n_points_file(cfradial1n_file): "frequency": 1, } assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} @@ -1085,7 +1085,7 @@ def test_open_nexradlevel2_datatree(nexradlevel2_files): ds = dtree[grp].ds assert dict(ds.sizes) == {"azimuth": azimuths[i], "range": ranges[i]} assert set(ds.data_vars) & ( - sweep_dataset_vars | non_standard_sweep_dataset_vars + sweep_observed_vars | non_standard_sweep_observed_vars ) == set(moments[i]) assert set(ds.data_vars) & (required_sweep_metadata_vars) == set( required_sweep_metadata_vars ^ {"azimuth", "elevation"} diff --git a/tests/test_util.py b/tests/test_util.py index ed0a80e6..770e8dd6 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -483,3 +483,30 @@ def test_get_subgroup(): np.testing.assert_almost_equal(subgroup.longitude.values.item(), 151.20899963378906) np.testing.assert_almost_equal(subgroup.latitude.values.item(), -33.700801849365234) assert isinstance(subgroup.altitude.values.item(), float) + + +def test_get_observed_variables(): + + filename = DATASETS.fetch("cor-main131125105503.RAW2049") + sweep = xr.open_dataset(filename, group="sweep_7", engine="iris") + dataset_vars = util.get_dataset_variables(sweep) + + assert dataset_vars == ["DBZH", "VRADH", "ZDR", "KDP", "PHIDP", "RHOHV"] + + +def test_select_observed_variables(): + + filename = DATASETS.fetch("DWD-Vol-2_99999_20180601054047_00.h5") + sweep = xr.open_dataset(filename, group="sweep_7", engine="gamic") + sweep = sweep.assign(quality1=sweep["DBZH"] < 60) + sweep["DBZH"].attrs["ancillary_variables"] = ["quality1"] + sweep = sweep.assign(quality2=sweep["RHOHV"] < 0.5) + sweep["RHOHV"].attrs["ancillary_variables"] = ["quality2"] + + select_vars = ["DBZH", "PHIDP"] + sweep = util.select_dataset_variables(sweep, variables=select_vars) + metadata = list(model.required_sweep_metadata_vars) + metadata.remove("elevation") + metadata.remove("azimuth") + + assert set(sweep.data_vars) == set(select_vars + ["quality1"] + metadata) diff --git a/xradar/accessors.py b/xradar/accessors.py index 2e08eee3..c8a18140 100644 --- a/xradar/accessors.py +++ b/xradar/accessors.py @@ -120,7 +120,7 @@ def get_crs(self): @xr.register_dataset_accessor("xradar") class XradarDataSetAccessor(XradarAccessor): - """Adds a number of xradar specific methods to xarray.DataArray objects.""" + """Adds a number of xradar specific methods to xarray.Dataset objects.""" def georeference( self, earth_radius=None, effective_radius_fraction=None diff --git a/xradar/model.py b/xradar/model.py index 47a0c2fb..5857381f 100644 --- a/xradar/model.py +++ b/xradar/model.py @@ -50,8 +50,8 @@ "sweep_coordinate_vars", "required_sweep_metadata_vars", "optional_sweep_metadata_vars", - "sweep_dataset_vars", - "non_standard_sweep_dataset_vars", + "sweep_observed_vars", + "non_standard_sweep_observed_vars", "determine_cfradial2_sweep_variables", "conform_cfradial2_sweep_group", ] @@ -172,8 +172,8 @@ "n_samples", } -#: sweep dataset variable names -sweep_dataset_vars = { +#: sweep observed variable names +sweep_observed_vars = { "DBZH", "DBZV", "ZH", @@ -213,8 +213,8 @@ "REC", } -#: non-standard sweep dataset variable names -non_standard_sweep_dataset_vars = { +#: non-standard sweep observed variable names +non_standard_sweep_observed_vars = { "DBZ", "VEL", "VR", @@ -373,7 +373,7 @@ #: required moment attributes moment_attrs = {"standard_name", "long_name", "units"} -# todo: align this with sweep_dataset_vars +# todo: align this with sweep_observed_vars #: CfRadial 2.1 / FM301 / ODIM_H5 mapping sweep_vars_mapping = { "DBZH": { diff --git a/xradar/util.py b/xradar/util.py index f75a24f8..d173c826 100644 --- a/xradar/util.py +++ b/xradar/util.py @@ -26,6 +26,8 @@ "apply_to_sweeps", "apply_to_volume", "map_over_sweeps", + "get_dataset_variables", + "select_dataset_variables", ] __doc__ = __doc__.format("\n ".join(__all__)) @@ -41,6 +43,8 @@ import xarray as xr from scipy import interpolate +from .model import required_sweep_metadata_vars, sweep_observed_vars + def has_import(pkg_name): return importlib.util.find_spec(pkg_name) @@ -266,7 +270,7 @@ def extract_angle_parameters(ds): angles_are_unique=angles_are_unique, times_are_unique=times_are_unique ) - # 4. get index and value of first measured angle, aka a1gate + # 4. get index and value of first observed angle, aka a1gate a1gate_idx = ds.time.argmin(first_angle).values a1gate_val = ds.time.idxmin(first_angle).values angle_dict.update(a1gate_idx=a1gate_idx, a1gate_val=a1gate_val) @@ -421,7 +425,7 @@ def ipol_time(ds, *, a1gate_idx=None, direction=None, **kwargs): Keyword Arguments ----------------- a1gate_idx : int | None - First measured gate. 0 assumed, if None. + First observed gate. 0 assumed, if None. direction : int | None 1: CW, -1: CCW, Clockwise assumed, if None. @@ -648,3 +652,55 @@ def _map_over_sweeps(*args, **kwargs): return xr.map_over_datasets(functools.partial(_func, **kwargs), *args) return _map_over_sweeps + + +def get_observed_variables(sweep): + """Get the list of observed variables in the radar sweep. + + Parameters + ---------- + sweep : xarray.Dataset + radar sweep dataset following WMO standard + + Returns + ------- + dataset_vars : list of strings + standard dataset variables in the radar sweep + """ + + dataset_vars = [x for x in sweep.data_vars if x in sweep_observed_vars] + + return dataset_vars + + +def select_observed_variables(sweep, variables, ancillary=True): + """Select observed variables in sweep. + + Parameters + ---------- + sweep : xarray.Dataset + radar sweep dataset following WMO standard + + variables : list of strings + dataset variables to be selected + + Keyword Arguments + ----------------- + ancillary : boolean + keep ancillary variables associated to dataset variables + + Returns + ------- + sweep_out : str + sweep with selected dataset variables and standard metadata variables + """ + + if ancillary: + for v in variables: + if "ancillary_variables" in sweep[v].attrs: + variables.extend(sweep[v].attrs["ancillary_variables"]) + + keep_vars = set(variables + list(required_sweep_metadata_vars)) + sweep_out = sweep[keep_vars] + + return sweep_out