Skip to content

Commit

Permalink
fix compare and make stacking default method (#1438)
Browse files Browse the repository at this point in the history
* fix compare and make stacking default method

* update changelog
  • Loading branch information
aloctavodia committed Nov 9, 2020
1 parent 9c21881 commit 44ceb08
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Add observed argument to (un)plot observed data in `plot_ppc` ([1422](https://github.com/arviz-devs/arviz/pull/1422))
* Add support for named dims and coordinates with multivariate observations ([1429](https://github.com/arviz-devs/arviz/pull/1429))
* Add skipna argument to `plot_posterior` ([1432](https://github.com/arviz-devs/arviz/pull/1432))
* Make stacking the default method to compute weights in `compare` ([1438](https://github.com/arviz-devs/arviz/pull/1438))


### Maintenance and fixes
Expand All @@ -20,8 +21,9 @@
* Have `from_pystan` store attrs as strings to allow netCDF storage ([1417](https://github.com/arviz-devs/arviz/pull/1417))
* Remove ticks and spines in `plot_violin` ([1426 ](https://github.com/arviz-devs/arviz/pull/1426))
* Use circular KDE function and fix tick labels in circular `plot_trace` ([1428](https://github.com/arviz-devs/arviz/pull/1428))
* Fix `pair_plot` for mixed discrete and continuous variables ([1434](https://github.com/arviz-devs/arviz/pull/1434))
* Fix `pair_plot` for mixed discrete and continuous variables ([1434](https://github.com/arviz-devs/arviz/pull/1434))
* Fix in-sample deviance in `plot_compare` ([1435](https://github.com/arviz-devs/arviz/pull/1435))
* Fix computation of weights in compare ([1438](https://github.com/arviz-devs/arviz/pull/1438))

### Deprecation

Expand Down
28 changes: 15 additions & 13 deletions arviz/stats/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@


def compare(
dataset_dict, ic=None, method="BB-pseudo-BMA", b_samples=1000, alpha=1, seed=None, scale=None
dataset_dict, ic=None, method="stacking", b_samples=1000, alpha=1, seed=None, scale=None
):
r"""Compare models based on PSIS-LOO `loo` or WAIC `waic` cross-validation.
Expand All @@ -62,8 +62,8 @@ def compare(
method: str
Method used to estimate the weights for each model. Available options are:
- 'stacking' : stacking of predictive distributions.
- 'BB-pseudo-BMA' : (default) pseudo-Bayesian Model averaging using Akaike-type
- 'stacking' : (default) stacking of predictive distributions.
- 'BB-pseudo-BMA' : pseudo-Bayesian Model averaging using Akaike-type
weighting. The weights are stabilized using the Bayesian bootstrap.
- 'pseudo-BMA': pseudo-Bayesian Model averaging using Akaike-type
weighting, without Bootstrap stabilization (not recommended).
Expand Down Expand Up @@ -141,6 +141,10 @@ def compare(
waic : Compute the widely applicable information criterion.
"""
warnings.warn(
"The default method used to estimate the weights for each model,"
"has changed from BB-pseudo-BMA to stacking"
)
names = list(dataset_dict.keys())
scale = rcParams["stats.ic_scale"] if scale is None else scale.lower()
if scale == "log":
Expand Down Expand Up @@ -210,7 +214,7 @@ def compare(
if method.lower() == "stacking":
rows, cols, ic_i_val = _ic_matrix(ics, ic_i)
exp_ic_i = np.exp(ic_i_val / scale_value)
last_col = cols - 1
km1 = cols - 1

def w_fuller(weights):
return np.concatenate((weights, [max(1.0 - np.sum(weights), 0.0)]))
Expand All @@ -224,18 +228,16 @@ def log_score(weights):

def gradient(weights):
w_full = w_fuller(weights)
grad = np.zeros(last_col)
for k in range(last_col - 1):
grad = np.zeros(km1)
for k in range(km1):
for i in range(rows):
grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, last_col]) / np.dot(
exp_ic_i[i], w_full
)
grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, km1]) / np.dot(exp_ic_i[i], w_full)
return -grad

theta = np.full(last_col, 1.0 / cols)
bounds = [(0.0, 1.0) for _ in range(last_col)]
theta = np.full(km1, 1.0 / cols)
bounds = [(0.0, 1.0) for _ in range(km1)]
constraints = [
{"type": "ineq", "fun": lambda x: 1.0 - np.sum(x)},
{"type": "ineq", "fun": lambda x: -np.sum(x) + 1.0},
{"type": "ineq", "fun": np.sum},
]

Expand All @@ -255,7 +257,7 @@ def gradient(weights):
z_bs = np.zeros_like(weights)
for i in range(b_samples):
z_b = np.dot(b_weighting[i], ic_i_val)
u_weights = np.exp((z_b - np.min(z_b)) / scale_value)
u_weights = np.exp((z_b - np.max(z_b)) / scale_value)
z_bs[i] = z_b # pylint: disable=unsupported-assignment-operation
weights[i] = u_weights / np.sum(u_weights)

Expand Down
4 changes: 2 additions & 2 deletions arviz/tests/base_tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def test_compare_unknown_ic_and_method(centered_eight, non_centered_eight):
def test_compare_different(centered_eight, non_centered_eight, ic, method, scale):
model_dict = {"centered": centered_eight, "non_centered": non_centered_eight}
weight = compare(model_dict, ic=ic, method=method, scale=scale)["weight"]
assert weight["non_centered"] >= weight["centered"]
assert weight["non_centered"] > weight["centered"]
assert_allclose(np.sum(weight), 1.0)


Expand All @@ -174,7 +174,7 @@ def test_compare_different_multidim(multidim_models, ic, method):
weight = compare(model_dict, ic=ic, method=method)["weight"]

# this should hold because the same seed is always used
assert weight["model_1"] >= weight["model_2"]
assert weight["model_1"] > weight["model_2"]
assert_allclose(np.sum(weight), 1.0)


Expand Down

0 comments on commit 44ceb08

Please sign in to comment.