Skip to content

Commit

Permalink
Merge branch 'main' into test_draw_close
Browse files Browse the repository at this point in the history
  • Loading branch information
veni-vidi-vici-dormivi committed Mar 8, 2024
2 parents 4a8dcf4 + e09a49b commit 9748df7
Show file tree
Hide file tree
Showing 9 changed files with 975 additions and 142 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pypi-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
path: dist
- name: Publish package to TestPyPI
if: github.event_name == 'push'
uses: pypa/gh-action-pypi-publish@v1.8.11
uses: pypa/gh-action-pypi-publish@v1.8.12
with:
repository_url: https://test.pypi.org/legacy/
verbose: true
Expand All @@ -111,6 +111,6 @@ jobs:
name: releases
path: dist
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@v1.8.11
uses: pypa/gh-action-pypi-publish@v1.8.12
with:
verbose: true
4 changes: 2 additions & 2 deletions mesmer/stats/_auto_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def select_ar_order(data, dim, maxlag, ic="bic"):
)

# remove zeros
selected_ar_order.data[selected_ar_order.data == 0] = np.NaN
selected_ar_order.data[selected_ar_order.data == 0] = np.nan

selected_ar_order.name = "selected_ar_order"

Expand Down Expand Up @@ -180,7 +180,7 @@ def _select_ar_order_np(data, maxlag, ic="bic"):
ar_lags = ar_select_order(data, maxlag=maxlag, ic=ic).ar_lags

# None is returned if no lag is selected
selected_ar_order = np.NaN if ar_lags is None else ar_lags[-1]
selected_ar_order = np.nan if ar_lags is None else ar_lags[-1]

return selected_ar_order

Expand Down
114 changes: 114 additions & 0 deletions mesmer_m/Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "30a205aa",
"metadata": {},
"outputs": [],
"source": [
"import tests_harmonic_model as thm\n",
"import importlib\n",
"importlib.reload(thm)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d888e6c",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"n_sel,mse,y_acts, y_preds = thm.BIC_gridsearch_domain(4,1)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9393ec1a",
"metadata": {},
"outputs": [],
"source": [
"n_sel.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88d1f77c",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"plt.figure(figsize=(10,7))\n",
"\n",
"for n_run in range(1):\n",
"\n",
" plt.scatter(np.arange(1,5),n_sel[:,n_run])\n",
"\n",
"plt.plot(np.arange(1,5), np.arange(1,5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7eea7caf",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10,7))\n",
"\n",
"for n_run in range(30):\n",
"\n",
" plt.scatter(np.arange(1,11),mse[:,n_run,6])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "777de5a8",
"metadata": {},
"outputs": [],
"source": [
"plt.plot(np.arange(120), y_acts[1][4,-120:\n",
" ])\n",
"plt.plot(np.arange(120), y_preds[1][4,-120:])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c7b8d8e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
247 changes: 247 additions & 0 deletions mesmer_m/harmonic_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# MESMER-M, land-climate dynamics group, S.I. Seneviratne
# Copyright (c) 2021 ETH Zurich, MESMER contributors listed in AUTHORS.
# Licensed under the GNU General Public License v3.0 or later see LICENSE or
# https://www.gnu.org/licenses/

"""
Functions to train monthly trend module of MESMER-M
"""

import numpy as np
import xarray as xr
from scipy import optimize


def generate_fourier_series_np(coeffs, n, x, mon):
"""construct the Fourier Series
Parameters
----------
coeffs : array-like of shape (4*n-2)
coefficients of Fourier Series.
n : Integer
Order of the Fourier Series.
x : array-like of shape (n_samples,)
yearly temperature values.
mon : array-like of shape (n_samples,)
month values (0-11).
Returns
-------
predictions: array-like of shape (n_samples,)
Fourier Series of order n calculated over x and mon with coeffs.
"""

return sum(
[
(coeffs[idx] * x + coeffs[idx + 1]) * np.sin(np.pi * i * (mon % 12 + 1) / 6)
+ (coeffs[idx + 2] * x + coeffs[idx + 3])
* np.cos(np.pi * i * (mon % 12 + 1) / 6)
for i, idx in enumerate(np.arange(n * 4, step=4))
]
)


def fit_fourier_series_np(x, y, n, repeat=False):
"""execute fitting of the harmonic model/fourier series
Parameters
----------
x : array-like of shape (n_samples,)
Yearly temperature values to predict with.
y : array-like of shape (n_samples*12,)
Target monthly temperature values.
n : Integer
Order of the Fourier Series.
repeat : bool, default: True
Whether x data should be expanded.
Method
------
We use np.linalg.lstsq as a simple solver, given we have the equation:
sum_{i=0}^{n} [(a{i}*x + b{i})*np.cos(\frac{np.pi*i*(mon%12+1)}{6}+(c{i}*x + d{i})*np.cos(\frac{np.pi*i*(mon%12+1)}{6})]
we expect the input A to be of size n_samples, n*4-2 such that each column contains each coefficient's respective variable
Returns
-------
coeffs : array-like of shape (4*n-2)
Fitted coefficients of Fourier series.
y : array-like of shape (n_samples*12,)
Predicted monthly temperature values.
"""

if repeat:
# each month scales to the yearly value at that timestep so need to repeat
x_train = np.repeat(x, 12)
# yearly temp. array for fitting if not already done
else:
x_train = x

# also get monthly values
mon_train = np.tile(np.arange(1, 13), int(x_train.shape[0] / 12))
# for simplicity's sake we take month values in there harmonic form
mon_train = (np.pi * (mon_train % 12 + 1)) / 6

# construct predictor matrix

# A = np.hstack(
# (
# [
# np.array(
# [
# x_train * np.sin(i_n * mon_train),
# np.sin(i_n * mon_train),
# x_train * np.cos(i_n * mon_train),
# np.sin(i_n * mon_train),
# ]
# ).T
# for i_n in range(n)
# ]
# )
# )

def fun(x, n, x_train, mon_train, y):
"""loss function for fitting fourier series in scipy.optimize.least_squares"""
loss = np.mean((generate_fourier_series_np(x, n, x_train, mon_train) - y) ** 2)

return loss

# print(A.shape,)
x0 = np.zeros(n * 4)
x0[2] = 1
x0[3] = 0

# NOTE: this seems to select less 'orders' than the scipy one
# np.linalg.lstsq(A, y)[0]

coeffs = optimize.least_squares(
fun, x0, args=(n, x_train, mon_train, y), loss="cauchy"
).x

y_pred = generate_fourier_series_np(coeffs, n, x_train, mon_train)

return coeffs, y_pred


def calculate_bic(n_samples, n_order, mse):
"""calculate Bayesian Information Criteria (BIC)
Parameters
----------
n_samples : Integer
size of training set.
n_order : Integer
Order of Fourier Series.
mse : Float
Mean-squared error.
Returns
-------
BIC score : Float
"""

n_params = n_order * 4 - 2

return n_samples * np.log(mse) + n_params * np.log(n_samples)


def fit_to_bic_np(x, y, max_n, repeat=False):
"""choose order of Fourier Series to fit for by minimising BIC score
Parameters
----------
x : array-like of shape (n_samples/12,)
Yearly temperature values to predict with.
y : array-like of shape (n_samples,)
Target monthly temperature values.
max_n : Integer
Maximum order of Fourier Series.
repeat : Boolean
Passed on to fit_fourier_series_np , default=False
Returns
-------
n_sel : Integer
Selected order of Fourier Series.
coeffs_fit : array-like of size (4*n_Sel-2,)
Fitted coefficients for the selected order of Fourier Series.
y_pred : array-like of size (n_samples,)
Predicted y values from final model.
"""

bic_score = np.zeros([max_n])

for i_n in range(1, max_n + 1):

_, y_pred = fit_fourier_series_np(x, y, i_n, repeat=repeat)
mse = np.mean((y_pred - y) ** 2)

bic_score[i_n - 1] = calculate_bic(len(y), i_n, mse)

n_sel = np.argmin(bic_score) + 1
coeffs_fit, y_pred = fit_fourier_series_np(x, y, n_sel, repeat=repeat)

coeffs = np.zeros([max_n * 4 - 2])
coeffs[: len(coeffs_fit)] = coeffs_fit

return n_sel, coeffs, y_pred


def fit_to_bic_xr(X, Y, max_n):
"""fit Fourier Series using BIC score to select order - xarray wrapper
Parameters
----------
X : xr.DataArray
Yearly temperature values used as predictors, must contain dims: ("sample","cell").
Y : xr.DataArray
Monthly temperature values to fit for, must contain dims: ("sample","cell").
max_n : Integer
Maximum order of Fourier Series to fit for.
Returns
-------
data_vars : `xr.Dataset`
Dataset containing the selected order of Fourier Series (n_sel), estimated
coefficients of the Fourier Series (coeffs) and the resulting predictions for
monthly temperatures (predictions).
"""

if not isinstance(X, xr.DataArray):
raise TypeError(f"Expected a `xr.DataArray`, got {type(X)}")

if not isinstance(Y, xr.DataArray):
raise TypeError(f"Expected a `xr.DataArray`, got {type(Y)}")

n_sel, coeffs, preds = xr.apply_ufunc(
fit_to_bic_np,
X,
Y,
input_core_dims=[["time"], ["time"]],
output_core_dims=([], ["coeff"], ["time"]),
vectorize=True,
output_dtypes=[int, float, float],
kwargs={"max_n": max_n, "repeat": False},
)

data_vars = {
"n_sel": n_sel,
"coeffs": coeffs,
"predictions": preds,
}

return xr.Dataset(data_vars)
Loading

0 comments on commit 9748df7

Please sign in to comment.