Skip to content

Commit

Permalink
Fix wrong averaging of standard deviations (#499)
Browse files Browse the repository at this point in the history
* implement right averaging of variances

* adjust tests 

* linting

* changelog

* update integration test files

* documentation
---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 22, 2024
1 parent a4c293a commit df670b4
Show file tree
Hide file tree
Showing 17 changed files with 36 additions and 35 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ Deprecations

Bug fixes
^^^^^^^^^
- Averaging standard deviations for the AR parameters of global variability over several ensemble members and scenarios now averages the
variances (`#499 <https://github.com/MESMER-group/mesmer/pull/499>`_).
By `Victoria Bauer`_.

Documentation
^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion mesmer/calibrate_mesmer/train_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def train_gv_AR(params_gv, gv, max_lag, sel_crit):
params_gv["AR_order_sel"] = AR_order.item()
params_gv["AR_int"] = np.float64(params.intercept.values)
params_gv["AR_coefs"] = params.coeffs.values.squeeze()
params_gv["AR_std_innovs"] = np.float64(params.standard_deviation.values)
params_gv["AR_var_innovs"] = np.float64(params.variance.values)

# check if fitted AR process is stationary
# (highly unlikely this test will ever fail but better safe than sorry)
Expand Down
4 changes: 2 additions & 2 deletions mesmer/calibrate_mesmer/train_lv.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):
# AR(1)
params_lv["AR1_int"] = {}
params_lv["AR1_coef"] = {}
params_lv["AR1_std_innovs"] = {}
params_lv["AR1_var_innovs"] = {}
params_lv["L"] = {} # localisation radius
# empirical cov matrix of the local variability trained on here
params_lv["ecov"] = {}
Expand All @@ -237,7 +237,7 @@ def train_lv_AR1_sci(params_lv, targs, y, wgt_scen_eq, aux, cfg):

params_lv["AR1_int"][targ_name] = params.intercept.values
params_lv["AR1_coef"][targ_name] = params.coeffs.values.squeeze()
params_lv["AR1_std_innovs"][targ_name] = params.standard_deviation.values
params_lv["AR1_var_innovs"][targ_name] = params.variance.values

# determine localization radius, empirical cov matrix, and localized ecov matrix

Expand Down
6 changes: 3 additions & 3 deletions mesmer/create_emulations/create_emus_gv.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
- ["AR_coefs"] (coefficients of the AR model for the lags which are contained in
the selected AR model, list of floats)
- ["AR_order_sel"] (selected AR order, int)
- ["AR_std_innovs"] (standard deviation of the innovations of the selected AR
- ["AR_var_innovs"] (standard deviation of the innovations of the selected AR
model, float)
nr_emus_v : int
Expand Down Expand Up @@ -155,7 +155,7 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
ar_int = params_gv["AR_int"]
ar_coefs = params_gv["AR_coefs"]
AR_order_sel = params_gv["AR_order_sel"]
AR_std_innovs = params_gv["AR_std_innovs"]
AR_var_innovs = params_gv["AR_var_innovs"]

# ensure ar_coefs are not a scalar
ar_coefs = np.atleast_1d(ar_coefs)
Expand All @@ -173,7 +173,7 @@ def create_emus_gv_AR(params_gv, nr_emus_v, nr_ts_emus_v, seed):
# the variables are 1D (except coeffs)
intercept = xr.DataArray(ar_int)
coeffs = xr.DataArray(ar_coefs, dims="lags")
variance = xr.DataArray(AR_std_innovs**2)
variance = xr.DataArray(AR_var_innovs)

ar_params = xr.Dataset(
{"intercept": intercept, "coeffs": coeffs, "variance": variance}
Expand Down
24 changes: 13 additions & 11 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@ def _fit_auto_regression_scen_ens(*objs, dim, ens_dim, lags):
Parameters
----------
*objs : iterable of DataArray
A list of ``xr.DataArray`` to estimate the auto regression over.
A list of ``xr.DataArray`` to estimate the auto regression over, each
representing one scenario, potentially with several ensemble members
along `ens_dim`.
dim : str
Dimension along which to fit the auto regression.
ens_dim : str
Dimension name of the ensemble members.
Dimension name of the ensemble members, None if no ensemble is provided.
lags : int
The number of lags to include in the model.
Expand All @@ -81,17 +83,17 @@ def _fit_auto_regression_scen_ens(*objs, dim, ens_dim, lags):
Notes
-----
Calculates the mean auto regression, first over the ensemble members, then over all
scenarios.
If `ens_dim` is not `None`, calculates the mean auto regression first over all ensemble
members and then over scenarios. This is done to weight scenarios equally, consequently
ensemble members are not weighted equally, if the number of members differs between scenarios.
If no ensemble members are provided, the mean is calculated over scenarios only.
"""

ar_params_scen = list()
for obj in objs:
ar_params = fit_auto_regression(obj, dim=dim, lags=int(lags))

# BUG/ TODO: fix for v1, see https://github.com/MESMER-group/mesmer/issues/307
ar_params["standard_deviation"] = np.sqrt(ar_params.variance)

# TODO: think about weighting! see https://github.com/MESMER-group/mesmer/issues/307
if ens_dim in ar_params.dims:
ar_params = ar_params.mean(ens_dim)

Expand Down Expand Up @@ -257,7 +259,8 @@ def draw_auto_regression_uncorrelated(
n_time x n_coeffs x n_realisations.
"""

# NOTE: we use variance and not std since we use multivariate normal
# also to draw univariate realizations
# check the input
_check_dataset_form(
ar_params, "ar_params", required_vars=("intercept", "coeffs", "variance")
Expand Down Expand Up @@ -554,6 +557,7 @@ def fit_auto_regression(data, dim, lags):
if np.ndim(lags) == 0:
lags = np.arange(lags) + 1

# return intercept, coeffs, variance, lags, nobs
data_vars = {
"intercept": intercept,
"coeffs": coeffs,
Expand Down Expand Up @@ -597,9 +601,7 @@ def _fit_auto_regression_np(data, lags):
coeffs = AR_result.params[1:]

# variance of the residuals
variance = AR_result.sigma2

nobs = AR_result.nobs
variance, nobs = AR_result.sigma2, AR_result.nobs

return intercept, coeffs, variance, nobs

Expand Down
10 changes: 3 additions & 7 deletions tests/integration/test_calibrate_mesmer_newcodepath.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,7 @@ def assert_params_allclose(
)
np.testing.assert_allclose(bundle["params_gv"]["AR_coefs"], global_ar_params.coeffs)
np.testing.assert_allclose(
bundle["params_gv"]["AR_std_innovs"], global_ar_params.standard_deviation
)

np.testing.assert_allclose( # this is not necessarily the same
bundle["params_gv"]["AR_std_innovs"] ** 2, global_ar_params.variance, atol=2e-5
bundle["params_gv"]["AR_var_innovs"], global_ar_params.variance
)

# local forced response
Expand Down Expand Up @@ -294,8 +290,8 @@ def assert_params_allclose(
bundle["params_lv"]["AR1_int"]["tas"], local_ar_params.intercept.squeeze()
)
np.testing.assert_allclose(
bundle["params_lv"]["AR1_std_innovs"]["tas"],
local_ar_params.standard_deviation.squeeze(),
bundle["params_lv"]["AR1_var_innovs"]["tas"],
local_ar_params.variance.squeeze(),
)

# covariance
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
22 changes: 11 additions & 11 deletions tests/unit/test_auto_regression_scen_ens.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import numpy as np
import pytest
import xarray as xr
from statsmodels.tsa.arima_process import ArmaProcess

import mesmer


def generate_ar_samples(ar, n_timesteps=100, n_ens=4):
def generate_ar_samples(ar, std=1, n_timesteps=100, n_ens=4):

np.random.seed(0)

data = ArmaProcess(ar, 0.1).generate_sample([n_timesteps, n_ens])
data = ArmaProcess(ar, 1).generate_sample([n_timesteps, n_ens], scale=std)

ens = np.arange(n_ens)

Expand Down Expand Up @@ -61,23 +62,24 @@ def test_select_ar_order_scen_ens_no_ens_dim():
xr.testing.assert_equal(result, expected)


def test_fit_auto_regression_scen_ens_one_scen():
@pytest.mark.parametrize("std", [1, 0.1, 0.5])
def test_fit_auto_regression_scen_ens_one_scen(std):

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)
n_timesteps = 100
da = generate_ar_samples([1, 0.5, 0.3, 0.4], std, n_timesteps=n_timesteps, n_ens=4)

result = mesmer.stats._fit_auto_regression_scen_ens(
da, dim="time", ens_dim="ens", lags=3
)

expected = mesmer.stats.fit_auto_regression(da, dim="time", lags=3)
expected["standard_deviation"] = np.sqrt(expected.variance)
expected = expected.mean("ens")

xr.testing.assert_equal(result, expected)
xr.testing.assert_allclose(result, expected)
np.testing.assert_allclose(np.sqrt(result.variance), std, rtol=1e-1)


def test_fit_auto_regression_scen_ens_multi_scen():

da1 = generate_ar_samples([1, 0.5, 0.3], n_timesteps=100, n_ens=4)
da2 = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=5)

Expand All @@ -88,8 +90,7 @@ def test_fit_auto_regression_scen_ens_multi_scen():
da = xr.concat([da1, da2], dim="scen")
da = da.stack(scen_ens=("scen", "ens")).dropna("scen_ens")
expected = mesmer.stats.fit_auto_regression(da, dim="time", lags=3)
expected = expected.unstack()
expected["standard_deviation"] = np.sqrt(expected.variance)
expected = expected.unstack("scen_ens")
expected = expected.mean("ens").mean("scen")

xr.testing.assert_equal(result, expected)
Expand All @@ -99,12 +100,11 @@ def test_fit_auto_regression_scen_ens_no_ens_dim():

da = generate_ar_samples([1, 0.5, 0.3, 0.4], n_timesteps=100, n_ens=4)

# simply fits each ens individually, no averaging
result = mesmer.stats._fit_auto_regression_scen_ens(
da, dim="time", ens_dim=None, lags=3
)

expected = mesmer.stats.fit_auto_regression(da, dim="time", lags=3)

expected["standard_deviation"] = np.sqrt(expected.variance)

xr.testing.assert_allclose(result, expected)

0 comments on commit df670b4

Please sign in to comment.