Skip to content

Commit

Permalink
add test model to build posterior prob from samples (gwastro#3616)
Browse files Browse the repository at this point in the history
* initial commit

* fixes + logging

* fixes

* cc

* add validation script for test posterior

* bounds have to be done manually

* plot mod

* Update analytic.py
  • Loading branch information
ahnitz committed Feb 15, 2021
1 parent 1be55fe commit 8f7e949
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 1 deletion.
69 changes: 69 additions & 0 deletions bin/inference/pycbc_validate_test_posterior
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python
""" Validate and generate diagnostic plots for a inference file using the
test posterior model.
"""
import sys
import numpy
import argparse
from matplotlib import use; use('Agg')
import pylab
from pycbc.inference.option_utils import prior_from_config
from pycbc.inference import models, io
from scipy.stats import gaussian_kde, ks_2samp
from pycbc.io import FieldArray
numpy.random.seed(0)

parser = argparse.ArgumentParser()
parser.add_argument('--input-file', help='inference posterior file')
parser.add_argument('--output-file', help='diagnostic plot')
parser.add_argument('--p-value-threshold', help='minimum ks test p-value',
type=float)
parser.add_argument('--ind-samples', help='use only this number of samples',
default=1000, type=int)
args = parser.parse_args()

size = int(1e6)
d1 = io.loadfile(args.input_file, 'r')

#We directly recreate the model and prior from the stored
#config to ensure the same configuration
config = d1.read_config_file()

prior = prior_from_config(config)
model = models.read_from_config(config)

# Draw reference samples directly from the kde
draw = model.kde.resample(size=size)
data = {v: draw[i, :] for i, v in enumerate(model.variable_params)}
ref = FieldArray.from_kwargs(**data)

# apply the prior bounds to ensure kde leakage is not a concern
for dist in prior.distributions:
param = dist._params[0]
bound = dist._bounds[param]
ref = ref[(bound.min < ref[param]) & (ref[param] < bound.max)]

nparam = len(model.variable_params)
fig, axs = pylab.subplots(1, nparam, figsize=[6*nparam, 4], dpi=100)

result = d1.read_samples(model.variable_params)
failed = False
for param, ax in zip(model.variable_params, axs):
rpart = numpy.random.choice(result[param], replace=False,
size=args.ind_samples)
kv, pvalue = ks_2samp(ref[param], rpart)
print("{}, p-value={:.3f}".format(param, pvalue))

pylab.sca(ax)
pylab.hist(ref[param], density=True, bins=30, label='reference')
pylab.hist(result[param], density=True, bins=30, alpha=0.5, label='sampler')
pylab.title('KS p-value = {:.4f}'.format(pvalue))
pylab.xlabel(param)
pylab.legend()
ax.get_yaxis().set_visible(False)

if pvalue < args.p_value_threshold:
failed = True

pylab.savefig(args.output_file)
sys.exit(failed)
Binary file modified examples/inference/analytic-normal2d/posterior-normal2d.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion pycbc/inference/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


from .analytic import (TestEggbox, TestNormal, TestRosenbrock, TestVolcano,
TestPrior)
TestPrior, TestPosterior)
from .gaussian_noise import GaussianNoise
from .marginalized_gaussian_noise import MarginalizedPhaseGaussianNoise
from .marginalized_gaussian_noise import MarginalizedPolarization
Expand Down Expand Up @@ -182,6 +182,7 @@ def read_from_config(cp, **kwargs):
TestNormal,
TestRosenbrock,
TestVolcano,
TestPosterior,
TestPrior,
GaussianNoise,
MarginalizedPhaseGaussianNoise,
Expand Down
47 changes: 47 additions & 0 deletions pycbc/inference/models/analytic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
log likelihood.
"""

import logging
import numpy
import numpy.random
from scipy import stats

from .base import BaseModel
Expand Down Expand Up @@ -212,3 +214,48 @@ def _loglikelihood(self):
"""Returns zero.
"""
return 0.


class TestPosterior(BaseModel):
r"""Build a test posterior from a set of samples using a kde
Parameters
----------
variable_params : (tuple of) string(s)
A tuple of parameter names that will be varied.
posterior_file : hdf file
A compatible pycbc inference output file which posterior samples can
be read from.
nsamples : int
Number of samples to draw from posterior file to build KDE.
**kwargs :
All other keyword arguments are passed to ``BaseModel``.
"""
name = "test_posterior"

def __init__(self, variable_params, posterior_file, nsamples, **kwargs):
super(TestPosterior, self).__init__(variable_params, **kwargs)

from pycbc.inference.io import loadfile # avoid cyclic import
logging.info('loading test posterior model')
inf_file = loadfile(posterior_file)
logging.info('reading samples')
samples = inf_file.read_samples(variable_params)
samples = numpy.array([samples[v] for v in variable_params])

# choose only the requested amount of samples
idx = numpy.arange(0, samples.shape[-1])
idx = numpy.random.choice(idx, size=int(nsamples), replace=False)
samples = samples[:, idx]

logging.info('making kde with %s samples', samples.shape[-1])
self.kde = stats.gaussian_kde(samples)
logging.info('done initializing test posterior model')

def _loglikelihood(self):
"""Returns the log pdf of the test posterior kde
"""
p = numpy.array([self.current_params[p] for p in self.variable_params])
logpost = self.kde.logpdf(p)
return float(logpost[0])

0 comments on commit 8f7e949

Please sign in to comment.