Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add test model to build posterior prob from samples #3616

Merged
merged 8 commits into from
Feb 12, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions bin/inference/pycbc_validate_test_posterior
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python
""" Validate and generate diagnostic plots for a inference file using the
test posterior model.
"""
import sys
import numpy
import argparse
from matplotlib import use; use('Agg')
import pylab
from pycbc.inference.option_utils import prior_from_config
from pycbc.inference import models, io
from scipy.stats import gaussian_kde, ks_2samp
from pycbc.io import FieldArray
numpy.random.seed(0)

parser = argparse.ArgumentParser()
parser.add_argument('--input-file', help='inference posterior file')
parser.add_argument('--output-file', help='diagnostic plot')
parser.add_argument('--p-value-threshold', help='minimum ks test p-value',
type=float)
parser.add_argument('--ind-samples', help='use only this number of samples',
default=1000, type=int)
args = parser.parse_args()

size = int(1e6)
d1 = io.loadfile(args.input_file, 'r')

#We directly recreate the model and prior from the stored
#config to ensure the same configuration
config = d1.read_config_file()

prior = prior_from_config(config)
model = models.read_from_config(config)

# Draw reference samples directly from the kde
draw = model.kde.resample(size=size)
data = {v: draw[i, :] for i, v in enumerate(model.variable_params)}
ref = FieldArray.from_kwargs(**data)

# apply the prior bounds to ensure kde leakage is not a concern
for dist in prior.distributions:
param = dist._params[0]
bound = dist._bounds[param]
ref = ref[(bound.min < ref[param]) & (ref[param] < bound.max)]

nparam = len(model.variable_params)
fig, axs = pylab.subplots(1, nparam, figsize=[6*nparam, 4], dpi=100)

result = d1.read_samples(model.variable_params)
failed = False
for param, ax in zip(model.variable_params, axs):
rpart = numpy.random.choice(result[param], replace=False,
size=args.ind_samples)
kv, pvalue = ks_2samp(ref[param], rpart)
print("{}, p-value={:.3f}".format(param, pvalue))

pylab.sca(ax)
pylab.hist(ref[param], density=True, bins=30, label='reference')
pylab.hist(result[param], density=True, bins=30, alpha=0.5, label='sampler')
pylab.title('KS p-value = {:.4f}'.format(pvalue))
pylab.xlabel(param)
pylab.legend()
ax.get_yaxis().set_visible(False)

if pvalue < args.p_value_threshold:
failed = True

pylab.savefig(args.output_file)
sys.exit(failed)
Binary file modified examples/inference/analytic-normal2d/posterior-normal2d.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion pycbc/inference/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


from .analytic import (TestEggbox, TestNormal, TestRosenbrock, TestVolcano,
TestPrior)
TestPrior, TestPosterior)
from .gaussian_noise import GaussianNoise
from .marginalized_gaussian_noise import MarginalizedPhaseGaussianNoise
from .marginalized_gaussian_noise import MarginalizedPolarization
Expand Down Expand Up @@ -182,6 +182,7 @@ def read_from_config(cp, **kwargs):
TestNormal,
TestRosenbrock,
TestVolcano,
TestPosterior,
TestPrior,
GaussianNoise,
MarginalizedPhaseGaussianNoise,
Expand Down
47 changes: 47 additions & 0 deletions pycbc/inference/models/analytic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
log likelihood.
"""

import logging
import numpy
import numpy.random
from scipy import stats

from .base import BaseModel
Expand Down Expand Up @@ -212,3 +214,48 @@ def _loglikelihood(self):
"""Returns zero.
"""
return 0.


class TestPosterior(BaseModel):
r"""Build a test posterior from a set of samples using a kde

Parameters
----------
variable_params : (tuple of) string(s)
A tuple of parameter names that will be varied.
posterior_file : hdf file
A compatible pycbc inference output file which posterior samples can
be read from.
nsamples : int
Number of samples to draw from posterior file to build KDE.
**kwargs :
All other keyword arguments are passed to ``BaseModel``.

"""
name = "test_posterior"

def __init__(self, variable_params, posterior_file, nsamples, **kwargs):
super(TestPosterior, self).__init__(variable_params, **kwargs)

from pycbc.inference.io import loadfile # avoid cyclic import
logging.info('loading test posterior model')
inf_file = loadfile(posterior_file)
logging.info('reading samples')
samples = inf_file.read_samples(variable_params)
samples = numpy.array([samples[v] for v in variable_params])

# choose only the requested amount of samples
idx = numpy.arange(0, samples.shape[-1])
idx = numpy.random.choice(idx, size=int(nsamples), replace=False)
samples = samples[:, idx]

logging.info('making kde with %s samples', samples.shape[-1])
self.kde = stats.gaussian_kde(samples)
logging.info('done initializing test posterior model')

def _loglikelihood(self):
"""Returns the log pdf of the Rosenbrock function.
"""
ahnitz marked this conversation as resolved.
Show resolved Hide resolved
p = numpy.array([self.current_params[p] for p in self.variable_params])
logpost = self.kde.logpdf(p)
return float(logpost[0])