diff --git a/docs/api/interval.md b/docs/api/interval.md index 6b546ea..afe6d10 100644 --- a/docs/api/interval.md +++ b/docs/api/interval.md @@ -1,34 +1,5 @@ # Interval Score -## Interval or Winkler Score - -For a prediction interval (PI), the interval or Winkler score is given by: - -$$ -\text{IS} = \begin{cases} - (u - l) + \frac{2}{\alpha}(l - y) & \text{for } y < l \\ - (u - l) & \text{for } l \leq y \leq u \\ - (u - l) + \frac{2}{\alpha}(y - u) & \text{for } y > u. \\ -\end{cases} -$$ - -for an $(1 - \alpha)$PI of $[l, u]$ and the true value $y$ [@gneiting_strictly_2007, @bracher2021evaluating @winkler1972decision]. - -## Weighted Interval Score - -The weighted interval score (WIS) is defined as - -$$ -\text{WIS}_{\alpha_{0:K}}(F, y) = \frac{1}{K+0.5}(w_0 \times |y - m| + \sum_{k=1}^K (w_k \times IS_{\alpha_k}(F, y))) -$$ - -where $m$ denotes the median prediction, $w_0$ denotes the weight of the median prediction, $IS_{\alpha_k}(F, y)$ denotes the interval score for the $1 - \alpha$ prediction interval and $w_k$ is the according weight. The WIS is calculated for a set of (central) PIs and the predictive median [@bracher2021evaluating]. The weights are an optional parameter and default weight is the canonical weight $w_k = \frac{2}{\alpha_k}$ and $w_0 = 0.5$. For these weights, it holds that: - -$$ -\text{WIS}_{\alpha_{0:K}}(F, y) \approx \text{CRPS}(F, y). -$$ - - ::: scoringrules.interval_score ::: scoringrules.weighted_interval_score diff --git a/pyproject.toml b/pyproject.toml index 77ad759..3a612a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,9 +58,13 @@ dev-dependencies = [ ] [tool.ruff] +line-length = 88 [tool.ruff.lint] ignore = ["E741"] [tool.coverage.run] omit = ["**/_gufuncs.py", "**/_gufunc.py"] + +[tool.coverage.report] +exclude_also = ["if tp.TYPE_CHECKING:"] diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index 2ec277a..4846884 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -58,7 +58,12 @@ def crps_ensemble( if axis != -1: forecasts = B.moveaxis(forecasts, axis, -1) - if not sorted_ensemble and estimator not in ["nrg", "akr", "akr_circperm", "fair"]: + if not sorted_ensemble and estimator not in [ + "nrg", + "akr", + "akr_circperm", + "fair", + ]: forecasts = B.sort(forecasts, axis=-1) if backend == "numba": @@ -865,7 +870,14 @@ def crps_gtclogistic( >>> sr.crps_gtclogistic(0.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1) """ return crps.gtclogistic( - observation, location, scale, lower, upper, lmass, umass, backend=backend + observation, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) @@ -953,7 +965,14 @@ def crps_clogistic( lmass = stats._logis_cdf((lower - location) / scale) umass = 1 - stats._logis_cdf((upper - location) / scale) return crps.gtclogistic( - observation, location, scale, lower, upper, lmass, umass, backend=backend + observation, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) @@ -990,7 +1009,14 @@ def crps_gtcnormal( >>> sr.crps_gtcnormal(0.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1) """ return crps.gtcnormal( - observation, location, scale, lower, upper, lmass, umass, backend=backend + observation, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) @@ -1078,7 +1104,14 @@ def crps_cnormal( lmass = stats._norm_cdf((lower - location) / scale) umass = 1 - stats._norm_cdf((upper - location) / scale) return crps.gtcnormal( - observation, location, scale, lower, upper, lmass, umass, backend=backend + observation, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) @@ -1146,7 +1179,15 @@ def crps_gtct( >>> sr.crps_gtct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1) """ return crps.gtct( - observation, df, location, scale, lower, upper, lmass, umass, backend=backend + observation, + df, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) @@ -1192,7 +1233,15 @@ def crps_tt( >>> sr.crps_tt(0.0, 2.0, 0.1, 0.4, -1.0, 1.0) """ return crps.gtct( - observation, df, location, scale, lower, upper, 0.0, 0.0, backend=backend + observation, + df, + location, + scale, + lower, + upper, + 0.0, + 0.0, + backend=backend, ) @@ -1240,7 +1289,15 @@ def crps_ct( lmass = stats._t_cdf((lower - location) / scale, df) umass = 1 - stats._t_cdf((upper - location) / scale, df) return crps.gtct( - observation, df, location, scale, lower, upper, lmass, umass, backend=backend + observation, + df, + location, + scale, + lower, + upper, + lmass, + umass, + backend=backend, ) diff --git a/scoringrules/_interval.py b/scoringrules/_interval.py index 4f8a530..749f21d 100644 --- a/scoringrules/_interval.py +++ b/scoringrules/_interval.py @@ -1,5 +1,5 @@ import typing as tp -from typing import Optional, Union +from typing import Optional from scoringrules.backend import backends from scoringrules.core import interval @@ -9,162 +9,177 @@ def interval_score( - observations: "ArrayLike", - lower: "Array", - upper: "Array", - alpha: Union[float, "Array"], - /, - axis: int = -1, + obs: "ArrayLike", + lower: "ArrayLike", + upper: "ArrayLike", + alpha: "ArrayLike", *, backend: "Backend" = None, ) -> "Array": - r"""Compute the Interval Score or Winkler Score [(Gneiting & Raftery, 2012)](https://www.tandfonline.com/doi/abs/10.1198/016214506000001437) for 1 - $\alpha$ prediction intervals PI = [lower, upper]. - - The interval score is defined as + r"""Compute the Interval Score or Winkler Score. - $\text{IS} = \begin{cases} - (u - l) + \frac{2}{\alpha}(l - y) & \text{for } y < l \\ - (u - l) & \text{for } l \leq y \leq u \\ - (u - l) + \frac{2}{\alpha}(y - u) & \text{for } y > u. \\ - \end{cases}$ + The interval score + [(Gneiting & Raftery, 2012)](https://doi.org/10.1198/016214506000001437) + is defined as - for an $1 - \alpha$ PI of $[l, u]$ and the true value $y$. + $$ + \text{IS} = + \begin{cases} + (u - l) + \frac{2}{\alpha}(l - y) & \text{for } y < l \\ + (u - l) & \text{for } l \leq y \leq u \\ + (u - l) + \frac{2}{\alpha}(y - u) & \text{for } y > u. \\ + \end{cases} + $$ - Note - ---- - Note that alpha can be a float or an array of coverages. - In the case alpha is a float, the output will have the same shape as the observations and we assume that shape of observations, - upper and lower is the same. In case alpha is a vector, the function will broadcast observations accordingly. + for an $1 - \alpha$ prediction interval of $[l, u]$ and the true value $y$. Parameters ---------- - observations: ArrayLike - The observed values. - lower: Array - The predicted lower bound of the prediction interval. - upper: Array - The predicted upper bound of the prediction interval. - alpha: Union[float, Array] - The 1 - alpha level for the prediction interval. - axis: int - The axis corresponding to the ensemble. Default is the last axis. - backend: str + obs: + The observations as a scalar or array of values. + lower: + The predicted lower bound of the PI as a scalar or array of values. + upper: + The predicted upper bound of the PI as a scalar or array of values. + alpha: + The 1 - alpha level for the PI as a scalar or array of values. + backend: The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. Returns ------- - score: Array - An array of interval scores for each prediction interval, which should be averaged to get meaningful values. + score: + Array with the interval score for the input values. + + Raises + ------ + ValueError: + If the lower and upper bounds do not have the same + shape or if the number of PIs does not match the number of alpha levels. + + Notes + ----- + Given an `obs` array of shape `(...,)`, in the case when multiple PIs are + evaluated `alpha` is an array of shape `(K,)`, then `lower` and `upper` must + have shape `(...,K)` and the output will have shape `(...,K)`. + + Examples + -------- + >>> import numpy as np + >>> import scoringrules as sr + >>> sr.interval_score(0.1, 0.0, 0.4, 0.5) + 0.4 + + >>> sr.interval_score( + ... obs=np.array([0.1, 0.2, 0.3]), + ... lower=np.array([0.0, 0.1, 0.2]), + ... upper=np.array([0.4, 0.3, 0.5]), + ... alpha=0.5, + ... ) + array([0.4, 0.2, 0.4]) + + >>> sr.interval_score( + ... obs=np.random.uniform(size=(10,)), + ... lower=np.ones((10,5)) * 0.2, + ... upper=np.ones((10,5)) * 0.8, + ... alpha=np.linspace(0.1, 0.9, 5), + ... ).shape + (10, 5) """ B = backends.active if backend is None else backends[backend] - single_alpha = isinstance(alpha, float) - - observations, lower, upper = map(B.asarray, (observations, lower, upper)) - - if axis != -1: - lower = B.moveaxis(lower, axis, -1) - upper = B.moveaxis(upper, axis, -1) - - if single_alpha: - if B.name == "numba": - return interval._interval_score_gufunc(observations, lower, upper, alpha) + obs, lower, upper, alpha = map(B.asarray, (obs, lower, upper, alpha)) - return interval._interval_score( - observations, lower, upper, alpha, backend=backend + if lower.shape != upper.shape: + raise ValueError( + "The lower and upper bounds must have the same shape." + f" Got lower {lower.shape} and upper {upper.shape}." ) - else: - alpha = B.asarray(alpha) - - if B.name == "numba": - return interval._interval_score_gufunc( - observations[..., None], lower, upper, alpha + if alpha.ndim == 1: + obs = obs[..., None] + if (lower.shape[-1] != alpha.shape[0]) or (upper.shape[-1] != alpha.shape[0]): + raise ValueError( + "The number of PIs does not match the number of alpha levels." + f" Got lower and upper of shapes {lower.shape}" + f" for alpha of shape {alpha.shape}." ) - return interval._interval_score( - observations[..., None], lower, upper, alpha, backend=backend - ) + res = interval.interval_score(obs, lower, upper, alpha) + return B.squeeze(res) def weighted_interval_score( - observations: "ArrayLike", + obs: "ArrayLike", median: "Array", lower: "Array", upper: "Array", alpha: "Array", /, - weight_median: Optional[float] = None, - weight_alpha: Optional["Array"] = None, - axis: int = -1, + w_median: Optional[float] = None, + w_alpha: Optional["Array"] = None, *, backend: "Backend" = None, ) -> "Array": - r"""Compute the Interval Score or Winkler Score [(Bracher J, Ray EL, Gneiting T, Reich NG, 2022)](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008618) for 1 - $\alpha$ prediction intervals PI = [lower, upper]. + r"""Compute the weighted interval score (WIS). + + The WIS [(Bracher et al., 2022)](https://doi.org/10.1371/journal.pcbi.1008618) + is defined as - The weighted interval score (WIS) is defined as + $$ + \text{WIS}_{\alpha_{0:K}}(F, y) = \frac{1}{K+0.5}(w_0 \times |y - m| + + \sum_{k=1}^K (w_k \times IS_{\alpha_k}(F, y))) + $$ - $\text{WIS}_{\alpha_{0:K}}(F, y) = \frac{1}{K+0.5}(w_0 \times |y - m| + \sum_{k=1}^K (w_k \times IS_{\alpha_k}(F, y)))$ + where $m$ denotes the median prediction, $w_0$ denotes the weight of the + median prediction, $IS_{\alpha_k}(F, y)$ denotes the interval score for the + $1 - \alpha$ prediction interval and $w_k$ is the according weight. + The WIS is calculated for a set of (central) PIs and the predictive median. + The weights are an optional parameter and default weight is the canonical + weight $w_k = \frac{2}{\alpha_k}$ and $w_0 = 0.5$. + For these weights, it holds that: - where $m$ denotes the median prediction, $w_0$ denotes the weight of the median prediction, - $IS_{\alpha_k}(F, y)$ denotes the interval score for the $1 - \alpha$ prediction interval and - $w_k$ is the according weight. The WIS is calculated for a set of (central) PIs and the predictive - median. The weights are an optional parameter and default weight - is the canonical weight $w_k = \frac{2}{\alpha_k}$ and $w_0 = 0.5$. Using the canonical weights, the WIS - can be used to approximate the CRPS. + $$ + \text{WIS}_{\alpha_{0:K}}(F, y) \approx \text{CRPS}(F, y). + $$ Parameters ---------- - observations: ArrayLike - The observed values. - median: Array - The median prediction - lower: Array - The predicted lower bound of the prediction interval. - upper: Array - The predicted upper bound of the prediction interval. - alpha: Array - The 1 - alpha level for the prediction interval. - weight_median: float - The weight for the median prediction. - weight_alpha: Array - The weights for the PI. - axis: int - The axis corresponding to the ensemble. Default is the last axis. - backend: str + obs: + The observations as a scalar or array of shape `(...,)`. + median: + The predicted median of the distribution as a scalar or array of shape `(...,)`. + lower: + The predicted lower bound of the PI. If `alpha` is an array of shape `(K,)`, + `lower` must have shape `(...,K)`. + upper: + The predicted upper bound of the PI. If `alpha` is an array of shape `(K,)`, + `upper` must have shape `(...,K)`. + alpha: + The 1 - alpha level for the prediction intervals as an array of shape `(K,)`. + w_median: + The weight for the median prediction. Defaults to 0.5. + w_alpha: + The weights for the PI. Defaults to `2/alpha`. + backend: The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. Returns ------- - score: Array - An array of interval scores for each observation, which should be averaged to get meaningful values. + score: + An array of interval scores with the same shape as `obs`. """ - if weight_alpha is None: - weight_alpha = alpha / 2 - if weight_median is None: - weight_median = 0.5 + if w_median is None: + w_median = 0.5 + if w_alpha is None: + w_alpha = alpha / 2 B = backends.active if backend is None else backends[backend] - observations, median, lower, upper, alpha, weight_alpha, weight_median = map( + args = map( B.asarray, - (observations, median, lower, upper, alpha, weight_alpha, weight_median), + (obs, median, lower, upper, alpha, w_median, w_alpha), ) - if axis != -1: - lower = B.moveaxis(lower, axis, -1) - upper = B.moveaxis(upper, axis, -1) - if B.name == "numba": - return interval._weighted_interval_score_gufunc( - observations, median, lower, upper, alpha, weight_median, weight_alpha - ) + return interval._weighted_interval_score_gufunc(*args) - return interval._weighted_interval_score( - observations, - median, - lower, - upper, - alpha, - weight_median, - weight_alpha, - backend=backend, - ) + return interval.weighted_interval_score(*args, backend=backend) diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index 350e215..10d6200 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -68,7 +68,10 @@ def quantile( @abc.abstractmethod def max( - self, x: "Array", axis: int | tuple[int, ...] | None, keepdims: bool = False + self, + x: "Array", + axis: int | tuple[int, ...] | None, + keepdims: bool = False, ) -> "Array": """Return the maximum value of an input array ``x``.""" @@ -111,7 +114,11 @@ def unique_values(self, x: "Array", /) -> "Array": @abc.abstractmethod def concat( - self, arrays: tuple["Array", ...] | list["Array"], /, *, axis: int | None = 0 + self, + arrays: tuple["Array", ...] | list["Array"], + /, + *, + axis: int | None = 0, ) -> "Array": """Join a sequence of arrays along an existing axis.""" diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py index 8dcdd2b..80332cb 100644 --- a/scoringrules/backend/jax.py +++ b/scoringrules/backend/jax.py @@ -69,7 +69,10 @@ def quantile( return jnp.quantile(x, q, axis=axis, keepdims=keepdims) def max( - self, x: "Array", axis: int | tuple[int, ...] | None, keepdims: bool = False + self, + x: "Array", + axis: int | tuple[int, ...] | None, + keepdims: bool = False, ) -> "Array": return jnp.max(x, axis=axis, keepdims=keepdims) @@ -107,7 +110,11 @@ def unique_values(self, x: "Array") -> "Array": return jnp.unique(x) def concat( - self, arrays: tuple["Array", ...] | list["Array"], /, *, axis: int | None = 0 + self, + arrays: tuple["Array", ...] | list["Array"], + /, + *, + axis: int | None = 0, ) -> "Array": return jnp.concatenate(arrays, axis=axis) diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 79393cc..1fce50f 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -69,7 +69,10 @@ def quantile( return np.quantile(x, q, axis=axis, keepdims=keepdims) def max( - self, x: "NDArray", axis: int | tuple[int, ...] | None, keepdims: bool = False + self, + x: "NDArray", + axis: int | tuple[int, ...] | None, + keepdims: bool = False, ) -> "NDArray": return np.max(x, axis=axis, keepdims=keepdims) @@ -124,7 +127,11 @@ def squeeze( return np.squeeze(x, axis=axis) def stack( - self, arrays: tuple["NDArray", ...] | list["NDArray"], /, *, axis: int = 0 + self, + arrays: tuple["NDArray", ...] | list["NDArray"], + /, + *, + axis: int = 0, ) -> "NDArray": return np.stack(arrays, axis=axis) diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py index daf6323..9143607 100644 --- a/scoringrules/backend/tensorflow.py +++ b/scoringrules/backend/tensorflow.py @@ -76,7 +76,10 @@ def quantile( raise NotImplementedError def max( - self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False + self, + x: "Tensor", + axis: int | tuple[int, ...] | None, + keepdims: bool = False, ) -> "Tensor": return tf.math.reduce_max(x, axis=axis, keepdims=keepdims) diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py index 933271d..3c7528b 100644 --- a/scoringrules/backend/torch.py +++ b/scoringrules/backend/torch.py @@ -70,7 +70,10 @@ def quantile( return torch.quantile(x, q, dim=axis, keepdim=keepdims) def max( - self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False + self, + x: "Tensor", + axis: int | tuple[int, ...] | None, + keepdims: bool = False, ) -> "Tensor": return torch.max(x, axis=axis, keepdim=keepdims)[0] @@ -105,7 +108,11 @@ def unique_values(self, x: "Tensor", /) -> "Tensor": return torch.unique(x) def concat( - self, arrays: tuple["Tensor", ...] | list["Tensor"], /, *, axis: int | None = 0 + self, + arrays: tuple["Tensor", ...] | list["Tensor"], + /, + *, + axis: int | None = 0, ) -> "Tensor": return torch.concat(arrays, axis=axis) diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py index e541b63..1cddcde 100644 --- a/scoringrules/core/crps/_closed.py +++ b/scoringrules/core/crps/_closed.py @@ -111,7 +111,8 @@ def _inner(params): # option 1: in a loop s = B.stack( - [_inner(params) for params in zip(obs, n, prob, strict=True)], axis=-1 + [_inner(params) for params in zip(obs, n, prob, strict=True)], + axis=-1, ) # option 2: apply_along_axis (does not work with JAX) @@ -526,7 +527,10 @@ def laplace( def logistic( - obs: "ArrayLike", mu: "ArrayLike", sigma: "ArrayLike", backend: "Backend" = None + obs: "ArrayLike", + mu: "ArrayLike", + sigma: "ArrayLike", + backend: "Backend" = None, ) -> "Array": """Compute the CRPS for the normal distribution.""" B = backends.active if backend is None else backends[backend] @@ -648,7 +652,10 @@ def negbinom( def normal( - obs: "ArrayLike", mu: "ArrayLike", sigma: "ArrayLike", backend: "Backend" = None + obs: "ArrayLike", + mu: "ArrayLike", + sigma: "ArrayLike", + backend: "Backend" = None, ) -> "Array": """Compute the CRPS for the logistic distribution.""" B = backends.active if backend is None else backends[backend] diff --git a/scoringrules/core/interval/__init__.py b/scoringrules/core/interval/__init__.py index 3382b26..6f3ef6c 100644 --- a/scoringrules/core/interval/__init__.py +++ b/scoringrules/core/interval/__init__.py @@ -1,14 +1,12 @@ try: - from ._gufunc import _interval_score_gufunc, _weighted_interval_score_gufunc + from ._gufunc import _weighted_interval_score_gufunc except ImportError: - _interval_score_gufunc = None _weighted_interval_score_gufunc = None -from ._score import _interval_score, _weighted_interval_score +from ._score import interval_score, weighted_interval_score __all__ = [ - "_interval_score", - "_weighted_interval_score", - "_interval_score_gufunc", + "interval_score", + "weighted_interval_score", "_weighted_interval_score_gufunc", ] diff --git a/scoringrules/core/interval/_gufunc.py b/scoringrules/core/interval/_gufunc.py index 52e96fa..0c3b62d 100644 --- a/scoringrules/core/interval/_gufunc.py +++ b/scoringrules/core/interval/_gufunc.py @@ -1,25 +1,5 @@ import numpy as np -from numba import guvectorize, vectorize - - -@vectorize( - [ - "float64(float64, float64, float64, float64)", - "float32(float32, float32, float32, float32)", - ] -) -def _interval_score_gufunc( - obs: np.ndarray, - lower: np.ndarray, - upper: np.ndarray, - alpha: np.ndarray, -): - """Interval score or Winkler score.""" - return ( - (upper - lower) - + (obs < lower) * (2 / alpha) * (lower - obs) - + (obs > upper) * (2 / alpha) * (obs - upper) - ) +from numba import guvectorize @guvectorize( diff --git a/scoringrules/core/interval/_score.py b/scoringrules/core/interval/_score.py index 899e1cb..92cfc4e 100644 --- a/scoringrules/core/interval/_score.py +++ b/scoringrules/core/interval/_score.py @@ -6,41 +6,36 @@ from scoringrules.core.typing import Array, Backend -def _interval_score( +def interval_score( obs: "Array", lower: "Array", upper: "Array", alpha: "Array", - backend: "Backend" = None, ) -> "Array": """Winkler or Interval Score for prediction interval PI[lower, upper] and observations.""" - # We don't need the backend here width = upper - lower above = obs > upper below = obs < lower - W = width + ( - below * (2 / alpha) * (lower - obs) + above * (2 / alpha) * (obs - upper) - ) + W = width + below * (2 / alpha) * (lower - obs) + W += above * (2 / alpha) * (obs - upper) return W -def _weighted_interval_score( +def weighted_interval_score( obs: "Array", median: "Array", lower: "Array", upper: "Array", alpha: "Array", - weight_median: "Array", - weight_alpha: "Array", + w_median: "Array", + w_alpha: "Array", backend: "Backend" = None, ) -> "Array": """Weighted Interval Score for prediction interval PI[lower, upper].""" B = backends.active if backend is None else backends[backend] - K = weight_alpha.shape[0] - IS = _interval_score(obs, lower, upper, alpha) - WIS = ( - B.sum(IS * B.expand_dims(weight_alpha, axis=-2), axis=-1) - + weight_median * median - ) + K = w_alpha.shape[0] + IS = interval_score(obs[..., None], lower, upper, alpha) + WIS = B.sum(IS * w_alpha, axis=-1) + WIS += w_median * median WIS /= K + 1 / 2 - return WIS + return B.squeeze(WIS) diff --git a/scoringrules/core/kernels/_approx.py b/scoringrules/core/kernels/_approx.py index 8e86fa8..528d096 100644 --- a/scoringrules/core/kernels/_approx.py +++ b/scoringrules/core/kernels/_approx.py @@ -33,7 +33,8 @@ def ensemble_uv( M: int = fct.shape[-1] e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) / M e_2 = B.sum( - gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend), axis=(-1, -2) + gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend), + axis=(-1, -2), ) / (M**2) e_3 = gauss_kern_uv(obs, obs) @@ -161,7 +162,9 @@ def vr_ensemble_uv( ) e_2 = B.sum( gauss_kern_uv( - B.expand_dims(fct, axis=-1), B.expand_dims(fct, axis=-2), backend=backend + B.expand_dims(fct, axis=-1), + B.expand_dims(fct, axis=-2), + backend=backend, ) * (B.expand_dims(fw, axis=-1) * B.expand_dims(fw, axis=-2)), axis=(-1, -2), diff --git a/scoringrules/core/logarithmic.py b/scoringrules/core/logarithmic.py index d95f0a9..cd4d7dc 100644 --- a/scoringrules/core/logarithmic.py +++ b/scoringrules/core/logarithmic.py @@ -149,7 +149,10 @@ def twopexponential( def gamma( - obs: "ArrayLike", shape: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None + obs: "ArrayLike", + shape: "ArrayLike", + rate: "ArrayLike", + backend: "Backend" = None, ) -> "Array": """Compute the logarithmic score for the gamma distribution.""" B = backends.active if backend is None else backends[backend] diff --git a/scoringrules/core/stats.py b/scoringrules/core/stats.py index 6220ddd..80523a4 100644 --- a/scoringrules/core/stats.py +++ b/scoringrules/core/stats.py @@ -55,7 +55,10 @@ def _exp_cdf(x: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None) -> "A def _gamma_pdf( - x: "ArrayLike", shape: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None + x: "ArrayLike", + shape: "ArrayLike", + rate: "ArrayLike", + backend: "Backend" = None, ) -> "Array": """Probability density function for the gamma distribution.""" B = backends.active if backend is None else backends[backend] @@ -64,7 +67,10 @@ def _gamma_pdf( def _gamma_cdf( - x: "ArrayLike", shape: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None + x: "ArrayLike", + shape: "ArrayLike", + rate: "ArrayLike", + backend: "Backend" = None, ) -> "Array": """Cumulative distribution function for the gamma distribution.""" B = backends.active if backend is None else backends[backend] diff --git a/scoringrules/visualization/reliability.py b/scoringrules/visualization/reliability.py index b8d0c2b..5dcf2f0 100644 --- a/scoringrules/visualization/reliability.py +++ b/scoringrules/visualization/reliability.py @@ -128,7 +128,11 @@ def _uncertainty_band(x, cep, n_bootstrap=100, bandtype="consistency", alpha=0.0 _x, _y, _cep = corp_reliability(_y, _x) res.append( interp1d( - _x, _cep, fill_value="nan", bounds_error=False, assume_sorted=True + _x, + _cep, + fill_value="nan", + bounds_error=False, + assume_sorted=True, )(x) ) res = np.array(res) diff --git a/tests/test_interval.py b/tests/test_interval.py index 79df546..c59a314 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -7,17 +7,65 @@ N = 100 -## We use Bracher et al (2021) Eq. (3) to test the WIS - @pytest.mark.parametrize("backend", BACKENDS) def test_interval_score(backend): + # basic functionality + _ = sr.interval_score(0.1, 0.0, 0.4, 0.5) + + # shapes + res = sr.interval_score( + obs=np.array([0.1, 0.2, 0.3]), + lower=np.array([0.0, 0.1, 0.2]), + upper=np.array([0.4, 0.3, 0.5]), + alpha=0.5, + backend=backend, + ) + assert res.shape == (3,) + + res = sr.interval_score( + obs=np.random.uniform(size=(10,)), + lower=np.ones((10, 5)) * 0.2, + upper=np.ones((10, 5)) * 0.8, + alpha=np.linspace(0.1, 0.9, 5), + backend=backend, + ) + assert res.shape == (10, 5) + + # raise ValueError + with pytest.raises(ValueError): + _ = sr.interval_score( + obs=np.random.uniform(size=(10,)), + lower=np.ones((10, 5)) * 0.2, + upper=np.ones((10, 4)) * 0.8, + alpha=np.linspace(0.1, 0.9, 5), + backend=backend, + ) + + with pytest.raises(ValueError): + _ = sr.interval_score( + obs=np.random.uniform(size=(10,)), + lower=np.ones((10, 5)) * 0.2, + upper=np.ones((10, 5)) * 0.8, + alpha=np.linspace(0.1, 0.9, 4), + backend=backend, + ) + + # correctness + res = sr.interval_score(0.1, 0.0, 0.4, 0.5, backend=backend) + np.isclose(res, 0.4) + + +## We use Bracher et al (2021) Eq. (3) to test the WIS +@pytest.mark.parametrize("backend", BACKENDS) +def test_weighted_interval_score(backend): obs = np.zeros(N) alpha = np.linspace(0.01, 0.99, 99) upper = st.norm(0, 1).ppf(np.tile(1 - alpha / 2, (N, 1))) lower = st.norm(0, 1).ppf(np.tile(alpha / 2, (N, 1))) - WIS = sr.weighted_interval_score(obs, obs, lower, upper, alpha) - CRPS = sr.crps_normal(obs, 0, 1) + WIS = sr.weighted_interval_score(obs, obs, lower, upper, alpha, backend=backend) + CRPS = sr.crps_normal(obs, 0, 1, backend=backend) + WIS, CRPS = map(np.asarray, (WIS, CRPS)) assert np.all(1 - WIS / CRPS <= 0.001 * CRPS)