Skip to content

Commit

Permalink
Implement Cholesky for covariance handling (#408)
Browse files Browse the repository at this point in the history
* implement cholesky for drawing realizations from a multivariate normal distribution

* use cholesky also in localized_covariance logpdf

* adjust tests

* add mac os and windows to github testing

* CHANGELOG

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
  • Loading branch information
3 people authored Mar 12, 2024
1 parent 5de8ca9 commit 79c8ee0
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 33 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:

jobs:
test:
name: py${{ matrix.python-version }} ${{ matrix.env }}
name: py${{ matrix.python-version }} ${{ matrix.os }} ${{ matrix.env }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
Expand All @@ -25,6 +25,12 @@ jobs:
- env: "min-all-deps"
python-version: "3.9"
os: "ubuntu-latest"
- env: ""
python-version: "3.12"
os: "macos-latest"
- env: ""
python-version: "3.12"
os: "windows-latest"
defaults:
run:
shell: bash -l {0}
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ New Features

Breaking changes
^^^^^^^^^^^^^^^^

- Using Cholesky decomposition for finding covariance localization radius and drawing from the multivariate normal distribution (`#408 <https://github.com/MESMER-group/mesmer/pull/408>`_)
By `Victoria Bauer`_.
- The supported versions of some dependencies were changed (`#399 <https://github.com/MESMER-group/mesmer/pull/399>`_, `#405 <https://github.com/MESMER-group/mesmer/pull/405>`_):

============ ============= =========
Expand Down
24 changes: 20 additions & 4 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import warnings

import numpy as np
import pandas as pd
import scipy
import xarray as xr

from mesmer.core.utils import _check_dataarray_form, _check_dataset_form
from mesmer.core.utils import LinAlgWarning, _check_dataarray_form, _check_dataset_form


def _select_ar_order_scen_ens(*objs, dim, ens_dim, maxlag, ic="bic"):
Expand Down Expand Up @@ -479,11 +482,24 @@ def _draw_auto_regression_correlated_np(

# NOTE: 'innovations' is the error or noise term.
# innovations has shape (n_samples, n_ts + buffer, n_coeffs)
innovations = np.random.multivariate_normal(
try:
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
except np.linalg.LinAlgError as e:
if "Matrix is not positive definite" in str(e):
w, v = np.linalg.eigh(covariance)
cov = scipy.stats.Covariance.from_eigendecomposition((w, v))
warnings.warn(
"Covariance matrix is not positive definite, using eigh instead of cholesky.",
LinAlgWarning,
)
else:
raise

innovations = scipy.stats.multivariate_normal.rvs(
mean=np.zeros(n_coeffs),
cov=covariance,
cov=cov,
size=[n_samples, n_ts + buffer],
)
).reshape(n_samples, n_ts + buffer, n_coeffs)

out = np.zeros([n_samples, n_ts + buffer, n_coeffs])
for t in range(ar_order + 1, n_ts + buffer):
Expand Down
6 changes: 3 additions & 3 deletions mesmer/stats/_localized_covariance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings

import numpy as np
import scipy
import xarray as xr

from mesmer.core.utils import (
Expand Down Expand Up @@ -267,12 +268,11 @@ def _get_neg_loglikelihood(data, covariance, weights):
The mean is assumed to be zero for all points.
"""

from scipy.stats import multivariate_normal

# NOTE: 90 % of time is spent in multivariate_normal.logpdf - not much point
# optimizing the rest

log_likelihood = multivariate_normal.logpdf(data, cov=covariance)
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
log_likelihood = scipy.stats.multivariate_normal.logpdf(data, cov=cov)

# logpdf can return a scalar, which np.average does not like
log_likelihood = np.atleast_1d(log_likelihood)
Expand Down
21 changes: 1 addition & 20 deletions tests/integration/test_draw_realisations_from_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,29 +102,10 @@ def test_make_realisations(
else:
exp = xr.open_dataset(expected_output_file)

# check that less than 10% of output differs. Something weird is
# happening with numpy's random seed (we get different values
# depending on the operating system) so we currently can't do any
# better than this.
_assert_frac_allclose(result, exp, rtol=1e-4, wrong_tol=0.1)

# # Ideally we would use the below, but we can't because of numpy's
# # random seed issue (see comment above).
# xr.testing.assert_allclose(result, exp, rtol=rtol)
xr.testing.assert_allclose(result, exp)

# make sure we can get onto a lat lon grid from what is saved
exp_reshaped = exp.set_index(z=("lat", "lon")).unstack("z")
expected_dims = {"scenario", "realisation", "lon", "lat", "year"}

assert set(exp_reshaped.dims) == expected_dims


def _assert_frac_allclose(result, expected, rtol, wrong_tol):
# check that less than wrong_tol of output differs

for v in expected.data_vars:

differing_spots = ~np.isclose(result[v].values, expected[v].values, rtol=rtol)

frac_differing = differing_spots.sum() / result[v].values.size
assert frac_differing < wrong_tol, v
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
33 changes: 29 additions & 4 deletions tests/unit/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import xarray as xr

import mesmer
from mesmer.core.utils import _check_dataarray_form, _check_dataset_form
from mesmer.core.utils import LinAlgWarning, _check_dataarray_form, _check_dataset_form
from mesmer.testing import trend_data_1D, trend_data_2D, trend_data_3D


Expand Down Expand Up @@ -79,7 +79,7 @@ def ar_params_1D():

intercept = xr.DataArray(0)
coeffs = xr.DataArray([0], dims="lags")
variance = xr.DataArray(0)
variance = xr.DataArray(0.5)
ar_params = xr.Dataset(
{"intercept": intercept, "coeffs": coeffs, "variance": variance}
)
Expand All @@ -92,7 +92,7 @@ def ar_params_2D():

intercept = xr.DataArray([0, 0], dims="gridcell")
coeffs = xr.DataArray([[0, 0]], dims=("lags", "gridcell"))
variance = xr.DataArray([0, 0], dims="gridcell")
variance = xr.DataArray([0.5, 0.3], dims="gridcell")
ar_params = xr.Dataset(
{"intercept": intercept, "coeffs": coeffs, "variance": variance}
)
Expand Down Expand Up @@ -345,7 +345,7 @@ def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples,

intercept = np.zeros(n_cells)
coefs = np.ones((ar_order, n_cells))
variance = np.ones((n_cells, n_cells))
variance = np.eye(n_cells)

result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
intercept=intercept,
Expand All @@ -362,6 +362,9 @@ def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples,
assert result.shape == expected_shape


@pytest.mark.filterwarnings(
"ignore:Covariance matrix is not positive definite, using eigh instead of cholesky."
)
@pytest.mark.parametrize("intercept", [0, 1, 3.14])
def test_draw_auto_regression_deterministic_intercept(intercept):

Expand Down Expand Up @@ -394,6 +397,9 @@ def test_draw_auto_regression_deterministic_intercept(intercept):
np.testing.assert_equal(result, expected)


@pytest.mark.filterwarnings(
"ignore:Covariance matrix is not positive definite, using eigh instead of cholesky."
)
def test_draw_auto_regression_deterministic_coefs_buffer():

result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
Expand Down Expand Up @@ -444,6 +450,25 @@ def test_draw_auto_regression_random():
np.testing.assert_allclose(result, expected)


def test_draw_auto_regression_correlated_eigh():
# test that the function uses eigh when the covariance matrix is not positive definite
with pytest.warns(
LinAlgWarning, match="Covariance matrix is not positive definite"
):
result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
intercept=1,
coeffs=np.array([[0.5, 0.7], [0.3, 0.2]]),
covariance=np.zeros((2, 2)),
n_samples=1,
n_ts=4,
seed=0,
buffer=3,
)

expected = np.array([[[1.0, 1.0], [1.5, 1.7], [2.05, 2.39], [2.475, 3.013]]])
np.testing.assert_allclose(result, expected)


@pytest.mark.parametrize("obj", [xr.Dataset(), None])
def test_fit_auto_regression_xr_errors(obj):

Expand Down

0 comments on commit 79c8ee0

Please sign in to comment.