-
Notifications
You must be signed in to change notification settings - Fork 47
/
evidence.py
466 lines (406 loc) · 17.1 KB
/
evidence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
"""Various methods for estimating the log evidence of a model."""
import logging
from typing import Optional, Union
import numpy as np
from scipy import stats
from scipy.integrate import simpson, trapezoid
from scipy.optimize import minimize_scalar
from scipy.special import logsumexp
from ..C import SIMPSON, STEPPINGSTONE, TRAPEZOID
from ..objective import (
AggregatedObjective,
NegLogParameterPriors,
NegLogPriors,
)
from ..problem import Problem
from ..result import Result
from .diagnostics import geweke_test
logger = logging.getLogger(__name__)
def laplace_approximation_log_evidence(
problem: Problem, x: np.ndarray
) -> float:
"""
Compute the log evidence using the Laplace approximation.
The objective in your `problem` must be a negative log posterior, and support Hessian computation.
Parameters
----------
problem:
The problem to compute the log evidence for.
x:
The maximum a posteriori estimate at which to compute the log evidence.
Returns
-------
log_evidence: float
"""
hessian = problem.objective(
problem.get_reduced_vector(x), sensi_orders=(2,)
)
_, log_det = np.linalg.slogdet(hessian)
log_prop_posterior = -problem.objective(problem.get_reduced_vector(x))
log_evidence = (
0.5 * np.log(2 * np.pi) * len(problem.x_free_indices)
- 0.5 * log_det
+ log_prop_posterior
)
return log_evidence
def harmonic_mean_log_evidence(
result: Result,
prior_samples: Optional[np.ndarray] = None,
neg_log_likelihood_fun: Optional[callable] = None,
) -> float:
"""
Compute the log evidence using the harmonic mean estimator.
Stabilized harmonic mean estimator is used if prior samples are provided.
Newton and Raftery (1994): https://doi.org/10.1111/j.2517-6161.1994.tb01956.x
Parameters
----------
result:
prior_samples:
Samples from the prior distribution. If samples from the prior are provided,
the stabilized harmonic mean is computed (recommended). Then, the likelihood function must be provided as well.
neg_log_likelihood_fun: callable
Function to evaluate the negative log likelihood. Necessary if prior_samples is not `None`.
Returns
-------
log_evidence
"""
if result.sample_result is None:
raise ValueError("No samples available. Run sampling first.")
# compute negative log likelihood from traces
burn_in = geweke_test(result)
trace_neglogpost = result.sample_result.trace_neglogpost[0, burn_in:]
trace_neglogprior = result.sample_result.trace_neglogprior[0, burn_in:]
neg_log_likelihoods_posterior = trace_neglogpost - trace_neglogprior
if prior_samples is None:
# compute harmonic mean from samples
return -logsumexp(neg_log_likelihoods_posterior) + np.log(
neg_log_likelihoods_posterior.size
)
# compute stabilized harmonic mean
if prior_samples is not None and neg_log_likelihood_fun is None:
raise ValueError(
"you need to provide a likelihood function to evaluate prior samples"
)
# compute delta (ratio of prior to posterior samples)
n_samples_prior = len(prior_samples)
n_samples_posterior = len(trace_neglogpost)
delta = n_samples_prior / (n_samples_prior + n_samples_posterior)
neg_log_likelihoods_prior = np.array(
[neg_log_likelihood_fun(x) for x in prior_samples]
)
log_likelihoods_stack = -np.concatenate(
[neg_log_likelihoods_prior, neg_log_likelihoods_posterior]
)
def _log_evidence_objective(log_p: float):
# Helper function to compute the log evidence with stabilized harmonic mean
log_w_i = logsumexp(
np.stack(
(
log_p * np.ones_like(log_likelihoods_stack),
log_likelihoods_stack,
),
axis=1,
),
b=np.array([delta, 1 - delta]),
axis=1,
)
res, sign = logsumexp(
[
log_p,
logsumexp(log_likelihoods_stack - log_w_i)
- logsumexp(-log_w_i),
],
b=[1, -1],
return_sign=True,
)
return sign * res
sol = minimize_scalar(_log_evidence_objective)
return sol.x
def parallel_tempering_log_evidence(
result: Result,
method: str = "trapezoid",
use_all_chains: bool = True,
) -> Union[float, None]:
"""Perform thermodynamic integration or steppingstone sampling to estimate the log evidence.
Thermodynamic integration is performed by integrating the mean log likelihood over the temperatures.
Errors might come from the samples itself or the numerical integration.
Steppingstone sampling is a form of importance sampling that uses the maximum likelihood of each temperature.
It does not require an integration, but can be biased for a small number of temperatures.
See (Annis et al., 2019), https://doi.org/10.1016/j.jmp.2019.01.005, for more details.
This should be used with a beta decay temperature schedule and not with the adaptive version of
parallel tempering sampling as the temperature schedule is not optimal for thermodynamic integration.
Parameters
----------
result:
Result object containing the samples.
method:
Integration method, either 'trapezoid' or 'simpson' to perform thermodynamic integration
(uses scipy for integration) or 'steppingstone' to perform steppingstone sampling.
use_all_chains:
If True, calculate burn-in for each chain and use the maximal burn-in for all chains for the integration.
This will fail if not all chains have converged yet.
Otherwise, use only the converged chains for the integration (might increase the integration error).
"""
# compute burn in for all chains but the last one (prior only)
burn_ins = np.zeros(len(result.sample_result.betas), dtype=int)
for i_chain in range(len(result.sample_result.betas)):
burn_ins[i_chain] = geweke_test(result, chain_number=i_chain)
max_burn_in = int(np.max(burn_ins))
if max_burn_in >= result.sample_result.trace_x.shape[1]:
logger.warning(
f"At least {np.sum(burn_ins >= result.sample_result.trace_x.shape[1])} chains seem to not have "
f"converged yet. You may want to use a larger number of samples."
)
if use_all_chains:
raise ValueError(
"Not all chains have converged yet. You may want to use a larger number of samples, "
"or try ´use_all_chains=False´, which might increase the integration error."
)
if use_all_chains:
# estimate mean of log likelihood for each beta
trace_loglike = (
result.sample_result.trace_neglogprior[::-1, max_burn_in:]
- result.sample_result.trace_neglogpost[::-1, max_burn_in:]
)
mean_loglike_per_beta = np.mean(trace_loglike, axis=1)
temps = result.sample_result.betas[::-1]
else:
# estimate mean of log likelihood for each beta if chain has converged
mean_loglike_per_beta = []
trace_loglike = []
temps = []
for i_chain in reversed(range(len(result.sample_result.betas))):
if burn_ins[i_chain] < result.sample_result.trace_x.shape[1]:
# save temperature-chain as it is converged
temps.append(result.sample_result.betas[i_chain])
# calculate mean log likelihood for each beta
trace_loglike_i = (
result.sample_result.trace_neglogprior[
i_chain, burn_ins[i_chain] :
]
- result.sample_result.trace_neglogpost[
i_chain, burn_ins[i_chain] :
]
)
trace_loglike.append(trace_loglike_i)
mean_loglike_per_beta.append(np.mean(trace_loglike_i))
if method == TRAPEZOID:
log_evidence = trapezoid(
# integrate from low to high temperature
y=mean_loglike_per_beta,
x=temps,
)
elif method == SIMPSON:
log_evidence = simpson(
# integrate from low to high temperature
y=mean_loglike_per_beta,
x=temps,
)
elif method == STEPPINGSTONE:
log_evidence = steppingstone(temps=temps, trace_loglike=trace_loglike)
else:
raise ValueError(
f"Unknown method {method}. Choose 'trapezoid', 'simpson' for thermodynamic integration or ",
"'steppingstone' for steppingstone sampling.",
)
return log_evidence
def steppingstone(temps: np.ndarray, trace_loglike: np.ndarray) -> float:
"""Perform steppingstone sampling to estimate the log evidence.
Implementation based on Annis et al. (2019): https://doi.org/10.1016/j.jmp.2019.01.005.
Parameters
----------
temps:
Temperature values.
trace_loglike:
Log likelihood values for each temperature.
"""
from scipy.special import logsumexp
ss_log_evidences = np.zeros(len(temps) - 1)
for t_i in range(1, len(temps)):
# we use the maximum likelihood times the temperature difference to stabilize the logsumexp
# original formulation uses only the maximum likelihood, this is equivalent
ss_log_evidences[t_i - 1] = logsumexp(
trace_loglike[t_i - 1] * (temps[t_i] - temps[t_i - 1])
) - np.log(trace_loglike[t_i - 1].size)
log_evidence = np.sum(ss_log_evidences)
return log_evidence
def bridge_sampling_log_evidence(
result: Result,
n_posterior_samples_init: Optional[int] = None,
initial_guess_log_evidence: Optional[float] = None,
max_iter: int = 1000,
tol: float = 1e-6,
) -> float:
"""
Compute the log evidence using bridge sampling.
Based on "A Tutorial on Bridge Sampling" by Gronau et al. (2017): https://doi.org/10.1016/j.jmp.2017.09.005.
Using the optimal bridge function by Meng and Wong (1996) which minimises the relative mean-squared error.
Proposal function is calibrated using posterior samples, which are not used for the final bridge estimate
(as this may result in an underestimation of the marginal likelihood, see Overstall and Forster (2010)).
Parameters
----------
result:
The pyPESTO result object with filled sample result.
n_posterior_samples_init:
Number of samples used to calibrate the proposal function. By default, half of the posterior samples are used.
initial_guess_log_evidence:
Initial guess for the log evidence. By default, the Laplace approximation is used to compute the initial guess.
max_iter:
Maximum number of iterations. Default is 1000.
tol:
Tolerance for convergence. Default is 1e-6.
Returns
-------
log_evidence
"""
if result.sample_result is None:
raise ValueError("No samples available. Run sampling first.")
if not isinstance(result.problem.objective, AggregatedObjective):
raise ValueError("Objective must be an AggregatedObjective.")
# use Laplace approximation to get initial guess for p(y)
if initial_guess_log_evidence is None:
initial_guess_log_evidence = laplace_approximation_log_evidence(
problem=result.problem, x=result.optimize_result.x[0]
)
# extract posterior samples
burn_in = geweke_test(result)
posterior_samples = result.sample_result.trace_x[0, burn_in:]
# build proposal function from posterior samples
if n_posterior_samples_init is None:
n_posterior_samples_init = int(posterior_samples.shape[0] * 0.5)
# randomly select samples for calibration
calibration_index = np.random.choice(
np.arange(posterior_samples.shape[0]),
n_posterior_samples_init,
replace=False,
)
samples_calibration = posterior_samples[calibration_index]
# remove calibration samples from posterior samples
posterior_samples = posterior_samples[
[
j
for j in range(posterior_samples.shape[0])
if j not in calibration_index
]
]
# generate proposal samples and define proposal function
n_proposal_samples = posterior_samples.shape[0]
posterior_mean = np.mean(samples_calibration, axis=0)
posterior_cov = np.cov(samples_calibration.T)
# if covariance matrix is not positive definite (numerically), use diagonal covariance matrix only
try:
# proposal density function
log_proposal_fun = stats.multivariate_normal(
mean=posterior_mean, cov=posterior_cov
).logpdf
except np.linalg.LinAlgError:
posterior_cov = np.diag(np.diag(posterior_cov))
log_proposal_fun = stats.multivariate_normal(
mean=posterior_mean, cov=posterior_cov
).logpdf
# generate proposal samples
if posterior_cov.size == 1:
# univariate case
proposal_samples = np.random.normal(
loc=posterior_mean,
scale=np.sqrt(posterior_cov),
size=n_proposal_samples,
)
proposal_samples = proposal_samples.reshape(-1, 1)
else:
# multivariate case
proposal_samples = np.random.multivariate_normal(
mean=posterior_mean, cov=posterior_cov, size=n_proposal_samples
)
# Compute the weights for the bridge sampling estimate
log_s1 = np.log(
posterior_samples.shape[0]
/ (posterior_samples.shape[0] + n_proposal_samples)
)
log_s2 = np.log(
n_proposal_samples / (posterior_samples.shape[0] + n_proposal_samples)
)
# Start with the initial guess for p(y)
log_p_y = initial_guess_log_evidence
# Compute the log-likelihood, log-prior, and log-proposal for the posterior and proposal samples
# assumes that the objective function is the negative log-likelihood + negative log-prior
# get index of prior in the objective function
likelihood_fun_indices = []
for i, obj in enumerate(result.problem.objective._objectives):
if not isinstance(obj, NegLogParameterPriors) and not isinstance(
obj, NegLogPriors
):
likelihood_fun_indices.append(i)
def log_likelihood_fun(x_array):
return np.array(
[
np.sum(
[
-obj(
result.problem.get_full_vector(
x=x, x_fixed_vals=result.problem.x_fixed_vals
)
)
for obj_i, obj in enumerate(
result.problem.objective._objectives
)
if obj_i in likelihood_fun_indices
]
)
for x in x_array
]
)
def log_prior_fun(x_array):
return np.array(
[
np.sum(
[
-obj(
result.problem.get_full_vector(
x=x, x_fixed_vals=result.problem.x_fixed_vals
)
)
for obj_i, obj in enumerate(
result.problem.objective._objectives
)
if obj_i not in likelihood_fun_indices
]
)
for x in x_array
]
)
log_likelihood_posterior = log_likelihood_fun(posterior_samples)
log_prior_posterior = log_prior_fun(posterior_samples)
log_proposal_posterior = log_proposal_fun(posterior_samples)
log_likelihood_proposal = log_likelihood_fun(proposal_samples)
log_prior_proposal = log_prior_fun(proposal_samples)
log_proposal_proposal = log_proposal_fun(proposal_samples)
log_h_posterior_1 = log_s1 + log_likelihood_posterior + log_prior_posterior
log_h_proposal_1 = log_s1 + log_likelihood_proposal + log_prior_proposal
for i in range(max_iter):
# Compute h(θ) for posterior samples
log_h_posterior_2 = log_s2 + log_p_y + log_proposal_posterior
log_h_posterior = logsumexp([log_h_posterior_1, log_h_posterior_2])
# Compute h(θ) for proposal samples
log_h_proposal_2 = log_s2 + log_p_y + log_proposal_proposal
log_h_proposal = logsumexp([log_h_proposal_1, log_h_proposal_2])
# Calculate the numerator and denominator for the bridge sampling estimate
temp = log_likelihood_proposal + log_prior_proposal + log_h_proposal
log_numerator = logsumexp(temp) - np.log(
temp.size
) # compute mean in log space
temp = log_proposal_posterior + log_h_posterior
log_denominator = logsumexp(temp) - np.log(
temp.size
) # compute mean in log space
# Update p(y)
log_p_y_new = log_numerator - log_denominator
# Check for convergence
if abs(log_p_y_new - log_p_y) < tol:
break
log_p_y = log_p_y_new
if i == max_iter - 1:
logger.warning(
"Bridge sampling did not converge in the given number of iterations."
)
return log_p_y