Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Cholesky for covariance handling #408

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e437ce2
test if drwaing realizations is close
veni-vidi-vici-dormivi Feb 28, 2024
51f9ec5
ad mac os and windows to github testing
veni-vidi-vici-dormivi Feb 28, 2024
159a695
Merge branch 'main' into test_draw_close
veni-vidi-vici-dormivi Feb 29, 2024
7bae77d
add OS to test job name
veni-vidi-vici-dormivi Feb 29, 2024
508cd92
test with new (macos created) dataset
veni-vidi-vici-dormivi Mar 1, 2024
a66d9c6
test with macos 14
veni-vidi-vici-dormivi Mar 4, 2024
b8b1232
print max difference between old and new dataset
veni-vidi-vici-dormivi Mar 4, 2024
2c70de4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 4, 2024
5acaded
very dirty first try to reuse A for drawing innovations
veni-vidi-vici-dormivi Mar 5, 2024
14ff890
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2024
f41ef13
revert implementation of A and home baked multivariate normal
veni-vidi-vici-dormivi Mar 7, 2024
0ce6c23
Actually revert "very dirty first try to reuse A for drawing innovati…
veni-vidi-vici-dormivi Mar 7, 2024
e2a176a
fix dimensions of innovations
veni-vidi-vici-dormivi Mar 7, 2024
aa75eec
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 7, 2024
21f0b9e
implement cholesky for drawing realizations
veni-vidi-vici-dormivi Mar 7, 2024
c9954bb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 7, 2024
f813882
adjust tests
veni-vidi-vici-dormivi Mar 8, 2024
d7426cc
add test for eigh
veni-vidi-vici-dormivi Mar 8, 2024
02bf602
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 8, 2024
871a32e
fix test
veni-vidi-vici-dormivi Mar 8, 2024
4a8dcf4
change mac os back to latest
veni-vidi-vici-dormivi Mar 8, 2024
9748df7
Merge branch 'main' into test_draw_close
veni-vidi-vici-dormivi Mar 8, 2024
c5fb678
REmove comment for default driver in eigh
veni-vidi-vici-dormivi Mar 8, 2024
02ba682
only run test for windows and macos for python 3.12
veni-vidi-vici-dormivi Mar 8, 2024
c496ef5
change warning on eigh to LinAlgWarning
veni-vidi-vici-dormivi Mar 8, 2024
4a6341e
change test_draw to only use assert_close
veni-vidi-vici-dormivi Mar 8, 2024
2c4cc90
use cholesky also in localized_covariance logpdf
veni-vidi-vici-dormivi Mar 11, 2024
ecb6103
CHANGELOG
veni-vidi-vici-dormivi Mar 11, 2024
a486a8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:

jobs:
test:
name: py${{ matrix.python-version }} ${{ matrix.env }}
name: py${{ matrix.python-version }} ${{ matrix.os }} ${{ matrix.env }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
Expand All @@ -25,6 +25,12 @@ jobs:
- env: "min-all-deps"
python-version: "3.9"
os: "ubuntu-latest"
- env: ""
python-version: "3.12"
os: "macos-latest"
- env: ""
python-version: "3.12"
os: "windows-latest"
defaults:
run:
shell: bash -l {0}
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ New Features

Breaking changes
^^^^^^^^^^^^^^^^

- Using Cholesky decomposition for finding covariance localization radius and drawing from the multivariate normal distribution (`#408 <https://github.com/MESMER-group/mesmer/pull/408>`_)
By `Victoria Bauer`_.
- The supported versions of some dependencies were changed (`#399 <https://github.com/MESMER-group/mesmer/pull/399>`_, `#405 <https://github.com/MESMER-group/mesmer/pull/405>`_):

============ ============= =========
Expand Down
24 changes: 20 additions & 4 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import warnings

import numpy as np
import pandas as pd
import scipy
import xarray as xr

from mesmer.core.utils import _check_dataarray_form, _check_dataset_form
from mesmer.core.utils import LinAlgWarning, _check_dataarray_form, _check_dataset_form


def _select_ar_order_scen_ens(*objs, dim, ens_dim, maxlag, ic="bic"):
Expand Down Expand Up @@ -479,11 +482,24 @@ def _draw_auto_regression_correlated_np(

# NOTE: 'innovations' is the error or noise term.
# innovations has shape (n_samples, n_ts + buffer, n_coeffs)
innovations = np.random.multivariate_normal(
try:
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
except np.linalg.LinAlgError as e:
if "Matrix is not positive definite" in str(e):
w, v = np.linalg.eigh(covariance)
cov = scipy.stats.Covariance.from_eigendecomposition((w, v))
warnings.warn(
veni-vidi-vici-dormivi marked this conversation as resolved.
Show resolved Hide resolved
"Covariance matrix is not positive definite, using eigh instead of cholesky.",
LinAlgWarning,
)
else:
raise

innovations = scipy.stats.multivariate_normal.rvs(
mean=np.zeros(n_coeffs),
cov=covariance,
cov=cov,
size=[n_samples, n_ts + buffer],
)
).reshape(n_samples, n_ts + buffer, n_coeffs)

out = np.zeros([n_samples, n_ts + buffer, n_coeffs])
for t in range(ar_order + 1, n_ts + buffer):
Expand Down
6 changes: 3 additions & 3 deletions mesmer/stats/_localized_covariance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings

import numpy as np
import scipy
import xarray as xr

from mesmer.core.utils import (
Expand Down Expand Up @@ -267,12 +268,11 @@ def _get_neg_loglikelihood(data, covariance, weights):
The mean is assumed to be zero for all points.
"""

from scipy.stats import multivariate_normal

# NOTE: 90 % of time is spent in multivariate_normal.logpdf - not much point
# optimizing the rest

log_likelihood = multivariate_normal.logpdf(data, cov=covariance)
cov = scipy.stats.Covariance.from_cholesky(np.linalg.cholesky(covariance))
log_likelihood = scipy.stats.multivariate_normal.logpdf(data, cov=cov)

# logpdf can return a scalar, which np.average does not like
log_likelihood = np.atleast_1d(log_likelihood)
Expand Down
21 changes: 1 addition & 20 deletions tests/integration/test_draw_realisations_from_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,29 +102,10 @@ def test_make_realisations(
else:
exp = xr.open_dataset(expected_output_file)

# check that less than 10% of output differs. Something weird is
# happening with numpy's random seed (we get different values
# depending on the operating system) so we currently can't do any
# better than this.
_assert_frac_allclose(result, exp, rtol=1e-4, wrong_tol=0.1)

# # Ideally we would use the below, but we can't because of numpy's
# # random seed issue (see comment above).
# xr.testing.assert_allclose(result, exp, rtol=rtol)
xr.testing.assert_allclose(result, exp)

# make sure we can get onto a lat lon grid from what is saved
exp_reshaped = exp.set_index(z=("lat", "lon")).unstack("z")
expected_dims = {"scenario", "realisation", "lon", "lat", "year"}

assert set(exp_reshaped.dims) == expected_dims


def _assert_frac_allclose(result, expected, rtol, wrong_tol):
# check that less than wrong_tol of output differs

for v in expected.data_vars:

differing_spots = ~np.isclose(result[v].values, expected[v].values, rtol=rtol)

frac_differing = differing_spots.sum() / result[v].values.size
assert frac_differing < wrong_tol, v
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
33 changes: 29 additions & 4 deletions tests/unit/test_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import xarray as xr

import mesmer
from mesmer.core.utils import _check_dataarray_form, _check_dataset_form
from mesmer.core.utils import LinAlgWarning, _check_dataarray_form, _check_dataset_form
from mesmer.testing import trend_data_1D, trend_data_2D, trend_data_3D


Expand Down Expand Up @@ -79,7 +79,7 @@ def ar_params_1D():

intercept = xr.DataArray(0)
coeffs = xr.DataArray([0], dims="lags")
variance = xr.DataArray(0)
variance = xr.DataArray(0.5)
ar_params = xr.Dataset(
{"intercept": intercept, "coeffs": coeffs, "variance": variance}
)
Expand All @@ -92,7 +92,7 @@ def ar_params_2D():

intercept = xr.DataArray([0, 0], dims="gridcell")
coeffs = xr.DataArray([[0, 0]], dims=("lags", "gridcell"))
variance = xr.DataArray([0, 0], dims="gridcell")
variance = xr.DataArray([0.5, 0.3], dims="gridcell")
ar_params = xr.Dataset(
{"intercept": intercept, "coeffs": coeffs, "variance": variance}
)
Expand Down Expand Up @@ -345,7 +345,7 @@ def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples,

intercept = np.zeros(n_cells)
coefs = np.ones((ar_order, n_cells))
variance = np.ones((n_cells, n_cells))
variance = np.eye(n_cells)

result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
intercept=intercept,
Expand All @@ -362,6 +362,9 @@ def test_draw_auto_regression_correlated_np_shape(ar_order, n_cells, n_samples,
assert result.shape == expected_shape


@pytest.mark.filterwarnings(
"ignore:Covariance matrix is not positive definite, using eigh instead of cholesky."
)
@pytest.mark.parametrize("intercept", [0, 1, 3.14])
def test_draw_auto_regression_deterministic_intercept(intercept):

Expand Down Expand Up @@ -394,6 +397,9 @@ def test_draw_auto_regression_deterministic_intercept(intercept):
np.testing.assert_equal(result, expected)


@pytest.mark.filterwarnings(
"ignore:Covariance matrix is not positive definite, using eigh instead of cholesky."
)
def test_draw_auto_regression_deterministic_coefs_buffer():

result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
Expand Down Expand Up @@ -444,6 +450,25 @@ def test_draw_auto_regression_random():
np.testing.assert_allclose(result, expected)


def test_draw_auto_regression_correlated_eigh():
# test that the function uses eigh when the covariance matrix is not positive definite
with pytest.warns(
LinAlgWarning, match="Covariance matrix is not positive definite"
):
result = mesmer.stats._auto_regression._draw_auto_regression_correlated_np(
intercept=1,
coeffs=np.array([[0.5, 0.7], [0.3, 0.2]]),
covariance=np.zeros((2, 2)),
n_samples=1,
n_ts=4,
seed=0,
buffer=3,
)

expected = np.array([[[1.0, 1.0], [1.5, 1.7], [2.05, 2.39], [2.475, 3.013]]])
np.testing.assert_allclose(result, expected)


@pytest.mark.parametrize("obj", [xr.Dataset(), None])
def test_fit_auto_regression_xr_errors(obj):

Expand Down