From c31044f059a3ebe49889fcf07eba00bc92df6c39 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 27 Sep 2023 11:25:49 +0200 Subject: [PATCH] auto_regression: return covariance (#309) * auto_regression: return covariance * add reference to #307 * CHANGELOG --- CHANGELOG.rst | 3 +++ mesmer/calibrate_mesmer/train_gv.py | 3 +++ mesmer/calibrate_mesmer/train_lv.py | 5 ++++- mesmer/stats/auto_regression.py | 11 ++++++----- tests/unit/test_auto_regression.py | 16 ++++++---------- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 138fd547..f8bfa6fb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -26,6 +26,9 @@ New Features - Add ``mesmer.stats.auto_regression._fit_auto_regression_xr``: xarray wrapper to fit an auto regression model (`#139 `_). By `Mathias Hauser`_. + - Have ``mesmer.stats.auto_regression._fit_auto_regression_xr`` return the covariance instead + of the standard deviation (`#306 `_). + By `Mathias Hauser`_. - Add ``mesmer.stats.auto_regression._draw_auto_regression_correlated_np``: to draw samples of an auto regression model (`#161 `_). By `Mathias Hauser`_. diff --git a/mesmer/calibrate_mesmer/train_gv.py b/mesmer/calibrate_mesmer/train_gv.py index e91ac14e..5902c9f1 100644 --- a/mesmer/calibrate_mesmer/train_gv.py +++ b/mesmer/calibrate_mesmer/train_gv.py @@ -200,6 +200,9 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit): data = xr.DataArray(data, dims=("run", "time")) params = _fit_auto_regression_xr(data, dim="time", lags=AR_order_sel) + # BUG/ TODO: we wrongfully average over the standard_deviation + # see https://github.com/MESMER-group/mesmer/issues/307 + params["standard_deviation"] = np.sqrt(params.covariance) params = params.mean("run") params_scen.append(params) diff --git a/mesmer/calibrate_mesmer/train_lv.py b/mesmer/calibrate_mesmer/train_lv.py index 927ac498..76a9015f 100644 --- a/mesmer/calibrate_mesmer/train_lv.py +++ b/mesmer/calibrate_mesmer/train_lv.py @@ -6,7 +6,7 @@ Functions to train local variability module of MESMER. """ - +import numpy as np import xarray as xr from mesmer.io.save_mesmer_bundle import save_mesmer_data @@ -237,6 +237,9 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg): data = xr.DataArray(data, dims=("run", "time", "cell")) params = _fit_auto_regression_xr(data, dim="time", lags=1) + # BUG/ TODO: we wrongfully average over the standard_deviation + # see https://github.com/MESMER-group/mesmer/issues/307 + params["standard_deviation"] = np.sqrt(params.covariance) params = params.mean("run") params_scen.append(params) diff --git a/mesmer/stats/auto_regression.py b/mesmer/stats/auto_regression.py index 2d935702..d2ba2c9d 100644 --- a/mesmer/stats/auto_regression.py +++ b/mesmer/stats/auto_regression.py @@ -177,7 +177,8 @@ def _fit_auto_regression_xr(data, dim, lags): if not isinstance(data, xr.DataArray): raise TypeError(f"Expected a `xr.DataArray`, got {type(data)}") - intercept, coeffs, std = xr.apply_ufunc( + # NOTE: this is slowish, see https://github.com/MESMER-group/mesmer/pull/290 + intercept, coeffs, covariance = xr.apply_ufunc( _fit_auto_regression_np, data, input_core_dims=[[dim]], @@ -193,7 +194,7 @@ def _fit_auto_regression_xr(data, dim, lags): data_vars = { "intercept": intercept, "coeffs": coeffs, - "standard_deviation": std, + "covariance": covariance, "lags": lags, } @@ -229,7 +230,7 @@ def _fit_auto_regression_np(data, lags): intercept = AR_result.params[0] coeffs = AR_result.params[1:] - # standard deviation of the residuals - std = np.sqrt(AR_result.sigma2) + # covariance of the residuals + covariance = AR_result.sigma2 - return intercept, coeffs, std + return intercept, coeffs, covariance diff --git a/tests/unit/test_auto_regression.py b/tests/unit/test_auto_regression.py index a9e17cdc..461d4eb0 100644 --- a/tests/unit/test_auto_regression.py +++ b/tests/unit/test_auto_regression.py @@ -198,7 +198,7 @@ def test_fit_auto_regression_xr_1D_values(): { "intercept": 1.04728995, "coeffs": ("lags", [0.99682459]), - "standard_deviation": 1.02655342, + "covariance": 1.05381192, "lags": [1], } ) @@ -219,7 +219,7 @@ def test_fit_auto_regression_xr_1D_values_lags(): { "intercept": 2.08295035, "coeffs": ("lags", [0.99318256]), - "standard_deviation": 1.08955374, + "covariance": 1.18712735, "lags": [2], } ) @@ -238,16 +238,14 @@ def test_fit_auto_regression_xr_1D(lags): _check_dataset_form( res, "_fit_auto_regression_result", - required_vars=["intercept", "coeffs", "standard_deviation"], + required_vars=["intercept", "coeffs", "covariance"], ) _check_dataarray_form(res.intercept, "intercept", ndim=0, shape=()) _check_dataarray_form( res.coeffs, "coeffs", ndim=1, required_dims={"lags"}, shape=(len(lags),) ) - _check_dataarray_form( - res.standard_deviation, "standard_deviation", ndim=0, shape=() - ) + _check_dataarray_form(res.covariance, "covariance", ndim=0, shape=()) expected = xr.DataArray(lags, coords={"lags": lags}) @@ -265,7 +263,7 @@ def test_fit_auto_regression_xr_2D(lags): _check_dataset_form( res, "_fit_auto_regression_result", - required_vars=["intercept", "coeffs", "standard_deviation"], + required_vars=["intercept", "coeffs", "covariance"], ) _check_dataarray_form(res.intercept, "intercept", ndim=1, shape=(n_cells,)) @@ -276,9 +274,7 @@ def test_fit_auto_regression_xr_2D(lags): required_dims={"cells", "lags"}, shape=(n_cells, lags), ) - _check_dataarray_form( - res.standard_deviation, "standard_deviation", ndim=1, shape=(n_cells,) - ) + _check_dataarray_form(res.covariance, "covariance", ndim=1, shape=(n_cells,)) @pytest.mark.parametrize("lags", [1, 2])