diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 29fd5c3c..ad2a7469 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,7 +7,7 @@ v0.9.0 - unreleased New Features ^^^^^^^^^^^^ -- Refactored statistical functionality for linear regression: +- Extracted statistical functionality for linear regression: - Create :py:class:`mesmer.stats.linear_regression.LinearRegression` which encapsulates ``fit``, ``predict``, etc. methods around linear regression (`#134 `_). @@ -22,7 +22,7 @@ New Features (`#221 `_). By `Mathias Hauser `_. -- Refactored statistical functionality for auto regression: +- Extracted statistical functionality for auto regression: - Add ``mesmer.stats.auto_regression._fit_auto_regression_xr``: xarray wrapper to fit an auto regression model (`#139 `_). By `Mathias Hauser `_. @@ -33,7 +33,7 @@ New Features (`#176 `_). By `Mathias Hauser `_. -- Refactored functions dealing with the spatial covariance and its localization: +- Extracted functions dealing with the spatial covariance and its localization: - Add xarray wrappers :py:func:`mesmer.stats.localized_covariance.adjust_covariance_ar1` and :py:func:`mesmer.stats.localized_covariance.find_localized_empirical_covariance` (`#191 `__). @@ -60,8 +60,9 @@ New Features - Added functions to mask the ocean and Antarctica (`#219 `_). By `Mathias Hauser `_. - - Added functions to calculate the weighted global mean (`#220 - `_). By `Mathias Hauser + - Added functions to calculate the weighted global mean ( + `#220 `_ + `#287 `_). By `Mathias Hauser `_. - Added functions to wrap arrays to [-180, 180) and [0, 360), respectively (`#270 `_ and `#273 diff --git a/docs/source/api.rst b/docs/source/api.rst index f7477d6b..fdea85c3 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -61,6 +61,9 @@ Geo-spatial Data handling ============= +Grid manipulation +----------------- + .. autosummary:: :toctree: generated/ @@ -70,13 +73,22 @@ Data handling ~core.grid.unstack_lat_lon_and_align ~core.grid.unstack_lat_lon ~core.grid.align_to_coords + +Masking regions +--------------- + ~core.mask.mask_ocean_fraction ~core.mask.mask_ocean ~core.mask.mask_antarctica - ~core.globmean.lat_weights - ~core.globmean.weighted_mean ~core.regionmaskcompat.mask_3D_frac_approx +Weighted operarions: calculate global mean +------------------------------------------ + + ~core.weighted.global_mean + ~core.weighted.lat_weights + ~core.weighted.weighted_mean + Legacy functions ================ diff --git a/mesmer/__init__.py b/mesmer/__init__.py index a8513edf..df54b6b6 100644 --- a/mesmer/__init__.py +++ b/mesmer/__init__.py @@ -8,7 +8,7 @@ """ from . import calibrate_mesmer, core, create_emulations, io, utils -from .core import globmean, grid, mask +from .core import grid, mask, weighted __all__ = [ "calibrate_mesmer", @@ -18,7 +18,7 @@ "io", "mask", "utils", - "globmean", + "weighted", ] try: diff --git a/mesmer/core/globmean.py b/mesmer/core/weighted.py similarity index 59% rename from mesmer/core/globmean.py rename to mesmer/core/weighted.py index 2fa05d9b..24f41369 100644 --- a/mesmer/core/globmean.py +++ b/mesmer/core/weighted.py @@ -11,7 +11,7 @@ def _weighted_if_dim(obj, weights, dims): # https://github.com/pydata/xarray/issues/7027 def _weighted_mean(da): - if all(dim in da.dims for dim in dims): + if dims is None or all(dim in da.dims for dim in dims): return da.weighted(weights).mean(dims, keep_attrs=True) return da @@ -34,8 +34,8 @@ def lat_weights(lat_coords): return weights -def weighted_mean(data, weights, x_dim="lon", y_dim="lat"): - """Calculate the area-weighted global mean +def weighted_mean(data, weights, dims=None): + """weighted mean - convinience function which ignores data_vars missing dims Parameters ---------- @@ -44,10 +44,8 @@ def weighted_mean(data, weights, x_dim="lon", y_dim="lat"): weights : xr.DataArray DataArray containing the area of each grid cell (or a measure proportional to the grid cell area). - x_dim : str, default: "lon" - Name of the x-dimension. - y_dim : str, default: "lat" - Name of the y-dimension. + dims : Hashable or Iterable of Hashable, optional + Dimension(s) over which to apply the weighted ``mean``. Returns ------- @@ -56,10 +54,42 @@ def weighted_mean(data, weights, x_dim="lon", y_dim="lat"): """ + if isinstance(dims, str): + dims = [dims] + # ensure grids are equal try: xr.align(data, weights, join="exact") except ValueError: raise ValueError("`data` and `weights` don't exactly align.") - return _weighted_if_dim(data, weights, [x_dim, y_dim]) + return _weighted_if_dim(data, weights, dims) + + +def global_mean(data, weights=None, x_dim="lon", y_dim="lat"): + """calculate global weighted mean + + Parameters + ---------- + data : xr.Dataset | xr.DataArray + Array reduce to the global mean. + weights : xr.DataArray, optional + DataArray containing the area of each grid cell (or a measure proportional to + the grid cell area). If not given will compute it from the cosine of the + latitudes. + x_dim : str, default: "lon" + Name of the x-dimension. + y_dim : str, default: "lat" + Name of the y-dimension. + + Returns + ------- + obj : xr.Dataset | xr.DataArray + Array converted to an unstructured grid. + + """ + + if weights is None: + weights = lat_weights(data[y_dim]) + + return weighted_mean(data, weights, [x_dim, y_dim]) diff --git a/tests/unit/test_globmean.py b/tests/unit/test_globmean.py deleted file mode 100644 index 05bffe45..00000000 --- a/tests/unit/test_globmean.py +++ /dev/null @@ -1,107 +0,0 @@ -import numpy as np -import pytest -import xarray as xr - -import mesmer - - -def data_lon_lat(as_dataset, x_dim="lon", y_dim="lat"): - - lon = np.arange(0.5, 360, 2) - lat = np.arange(90, -91, -2) - time = np.arange(3) - - data = np.random.randn(*time.shape, *lat.shape, *lon.shape) - - da = xr.DataArray( - data, - dims=("time", y_dim, x_dim), - coords={"time": time, x_dim: lon, y_dim: lat}, - attrs={"key": "da_attrs"}, - ) - - ds = xr.Dataset(data_vars={"data": da, "scalar": 1}, attrs={"key": "ds_attrs"}) - - if as_dataset: - return ds - return ds.data - - -def test_lat_weights_scalar(): - - np.testing.assert_allclose(mesmer.globmean.lat_weights(90), 0.0, atol=1e-7) - np.testing.assert_allclose(mesmer.globmean.lat_weights(45), np.sqrt(2) / 2) - np.testing.assert_allclose(mesmer.globmean.lat_weights(0), 1.0, atol=1e-7) - np.testing.assert_allclose(mesmer.globmean.lat_weights(-45), np.sqrt(2) / 2) - np.testing.assert_allclose(mesmer.globmean.lat_weights(-90), 0.0, atol=1e-7) - - -def test_lat_weights(): - - lat_coords = np.arange(90, -91, -1) - lat = xr.DataArray(lat_coords, dims=("lat"), coords={"lat": lat_coords}, name="lat") - - expected = np.cos(np.deg2rad(lat_coords)) - expected = xr.DataArray(expected, dims=("lat"), coords={"lat": lat}, name="lat") - - result = mesmer.globmean.lat_weights(lat) - - xr.testing.assert_equal(result, expected) - - -def test_lat_weights_2D_warn_2D(): - - lat = np.arange(10).reshape(2, 5) - - with pytest.warns(UserWarning, match="non-regular grids"): - mesmer.globmean.lat_weights(lat) - - -@pytest.mark.parametrize("lat", [-91, 90.1]) -def test_lat_weights_2D_error_90(lat): - - with pytest.raises(ValueError, match="`lat_coords` must be between -90 and 90"): - mesmer.globmean.lat_weights(lat) - - -def _test_weighted_mean(as_dataset, **kwargs): - # not checking the actual mask - - data = data_lon_lat(as_dataset, **kwargs) - - y_coords = kwargs.get("y_dim", "lat") - weights = mesmer.globmean.lat_weights(data[y_coords]) - - result = mesmer.globmean.weighted_mean(data, weights=weights, **kwargs) - - if as_dataset: - # ensure scalar is not broadcast - assert result.scalar.ndim == 0 - assert result.attrs == {"key": "ds_attrs"} - - result_da = result.data - else: - result_da = result - - assert result_da.ndim == 1 - assert result_da.notnull().all() - - assert result_da.attrs == {"key": "da_attrs"} - - -@pytest.mark.parametrize("as_dataset", [True, False]) -def test_calc_weighted_mean_default(as_dataset): - - _test_weighted_mean(as_dataset) - - -@pytest.mark.parametrize("as_dataset", (True, False)) -@pytest.mark.parametrize("x_dim", ("x", "lon")) -@pytest.mark.parametrize("y_dim", ("y", "lat")) -def test_calc_weighted_mean(as_dataset, x_dim, y_dim): - - _test_weighted_mean( - as_dataset, - x_dim=x_dim, - y_dim=y_dim, - ) diff --git a/tests/unit/test_weighted.py b/tests/unit/test_weighted.py new file mode 100644 index 00000000..fec71085 --- /dev/null +++ b/tests/unit/test_weighted.py @@ -0,0 +1,177 @@ +import numpy as np +import pytest +import xarray as xr + +import mesmer + + +def data_lon_lat(as_dataset, x_dim="lon", y_dim="lat"): + + lon = np.arange(0.5, 360, 2) + lat = np.arange(90, -91, -2) + time = np.arange(3) + + data = np.random.randn(*time.shape, *lat.shape, *lon.shape) + + da = xr.DataArray( + data, + dims=("time", y_dim, x_dim), + coords={"time": time, x_dim: lon, y_dim: lat}, + attrs={"key": "da_attrs"}, + ) + + ds = xr.Dataset(data_vars={"data": da, "scalar": 1}, attrs={"key": "ds_attrs"}) + + if as_dataset: + return ds + return ds.data + + +def test_lat_weights_scalar(): + + np.testing.assert_allclose(mesmer.weighted.lat_weights(90), 0.0, atol=1e-7) + np.testing.assert_allclose(mesmer.weighted.lat_weights(45), np.sqrt(2) / 2) + np.testing.assert_allclose(mesmer.weighted.lat_weights(0), 1.0, atol=1e-7) + np.testing.assert_allclose(mesmer.weighted.lat_weights(-45), np.sqrt(2) / 2) + np.testing.assert_allclose(mesmer.weighted.lat_weights(-90), 0.0, atol=1e-7) + + +def test_lat_weights(): + + attrs = {"key": "value"} + lat_coords = np.arange(90, -91, -1) + lat = xr.DataArray( + lat_coords, dims=("lat"), coords={"lat": lat_coords}, name="lat", attrs=attrs + ) + + expected = np.cos(np.deg2rad(lat_coords)) + expected = xr.DataArray( + expected, dims=("lat"), coords={"lat": lat_coords}, name="lat", attrs=attrs + ) + + result = mesmer.weighted.lat_weights(lat) + + xr.testing.assert_identical(result, expected) + + +def test_lat_weights_2D_warn_2D(): + + lat = np.arange(10).reshape(2, 5) + + with pytest.warns(UserWarning, match="non-regular grids"): + mesmer.weighted.lat_weights(lat) + + +@pytest.mark.parametrize("lat", [-91, 90.1]) +def test_lat_weights_2D_error_90(lat): + + with pytest.raises(ValueError, match="`lat_coords` must be between -90 and 90"): + mesmer.weighted.lat_weights(lat) + + +@pytest.mark.parametrize("as_dataset", [True, False]) +def test_weighted_mean_errors_wrong_weights(as_dataset): + + data = data_lon_lat(as_dataset) + weights = mesmer.weighted.lat_weights(data["lat"]) + weights = weights.isel(lat=slice(None, weights.size - 3)) + + with pytest.raises(ValueError, match="`data` and `weights` don't exactly align."): + mesmer.weighted.weighted_mean(data, weights=weights, dims=("lat", "lon")) + + with pytest.raises(ValueError, match="`data` and `weights` don't exactly align."): + mesmer.weighted.weighted_mean(data, weights=weights, dims=("lat", "lon")) + + data.weighted(weights).mean(("lat", "lon")) + data.weighted(weights).mean(("lat", "lon")) + + +def _test_weighted_mean(as_dataset, **kwargs): + # not checking the actual values + + data = data_lon_lat(as_dataset, **kwargs) + + y_dim = kwargs.get("y_dim", "lat") + weights = mesmer.weighted.lat_weights(data[y_dim]) + + dims = list(kwargs.values()) if kwargs else ("lat", "lon") + + result = mesmer.weighted.weighted_mean(data, weights=weights, dims=dims) + + result = data.weighted(weights).mean(dims, keep_attrs=True) + + if as_dataset: + # ensure scalar is not broadcast + assert result.scalar.ndim == 0 + assert result.attrs == {"key": "ds_attrs"} + + result_da = result.data + else: + result_da = result + + assert result_da.ndim == 1 + assert result_da.notnull().all() + + assert result_da.attrs == {"key": "da_attrs"} + + +@pytest.mark.parametrize("as_dataset", [True, False]) +def test_calc_weighted_mean_default(as_dataset): + + _test_weighted_mean(as_dataset) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("x_dim", ("x", "lon")) +@pytest.mark.parametrize("y_dim", ("y", "lat")) +def test_calc_weighted_mean(as_dataset, x_dim, y_dim): + + _test_weighted_mean( + as_dataset, + x_dim=x_dim, + y_dim=y_dim, + ) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_weighted_no_scalar_expand(as_dataset): + + data = data_lon_lat(as_dataset) + weights = xr.ones_like(data.lat) + + result = mesmer.weighted.weighted_mean(data, weights=weights, dims="lon") + + expected = data.mean("lon") + + xr.testing.assert_allclose(result, expected) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("x_dim", ("x", "lon")) +@pytest.mark.parametrize("y_dim", ("y", "lat")) +def test_global_mean_no_weights_passed(as_dataset, x_dim, y_dim): + + data = data_lon_lat(as_dataset, y_dim=y_dim, x_dim=x_dim) + + weights = mesmer.weighted.lat_weights(data[y_dim]) + + result = mesmer.weighted.global_mean(data, x_dim=x_dim, y_dim=y_dim) + + dims = (x_dim, y_dim) + expected = mesmer.weighted.weighted_mean(data, weights=weights, dims=dims) + + xr.testing.assert_equal(result, expected) + + +@pytest.mark.parametrize("as_dataset", (True, False)) +def test_global_mean_weights_passed(as_dataset): + + data = data_lon_lat(as_dataset) + + weights = xr.ones_like(data["lat"]) + + result = mesmer.weighted.global_mean(data, weights=weights) + + expected = data.mean(("lat", "lon")) + + xr.testing.assert_allclose(result, expected)