From 926b628bccda5d0ca8ae32db08ec5d826372e0fc Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Sat, 20 Jun 2020 18:09:23 +0200 Subject: [PATCH 01/68] add plugin handling for waveforms (#3319) * add plugin handling for waveforms * cc * cc * add debugging, don't require m1/m2 for all waveforms * add waveform plugin docs * ws * forgot to commit this * Collin's comments * test --- docs/index.rst | 43 +++++++++++---- docs/inference.rst | 5 ++ docs/waveform_plugin.rst | 92 +++++++++++++++++++++++++++++++ examples/waveform/add_waveform.py | 47 ++++++++++++++++ pycbc/waveform/__init__.py | 5 ++ pycbc/waveform/parameters.py | 4 ++ pycbc/waveform/plugin.py | 66 ++++++++++++++++++++++ pycbc/waveform/waveform.py | 55 +++++++++++------- tools/docker_build_dist.sh | 2 +- 9 files changed, 286 insertions(+), 33 deletions(-) create mode 100644 docs/waveform_plugin.rst create mode 100644 examples/waveform/add_waveform.py create mode 100644 pycbc/waveform/plugin.py diff --git a/docs/index.rst b/docs/index.rst index 92903fc1811..d78ae013aed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -72,16 +72,28 @@ your own build. pip install lalsuite pycbc -Full detailed installation instructions for users who want to use and develop PyCBC are available at: +Full detailed installation instructions which covers other installation cases: .. toctree:: :maxdepth: 1 install -======================= -Documentation for Users -======================= +==================================================== +Parameter Estimation of Gravitational-wave Sources +==================================================== + +Users who want to create and run parameter estimation workflows should read the +documentation at: + +.. toctree:: + :maxdepth: 2 + + inference + +========================================== +Searching for Gravitational-wave Signals +========================================== Users who want to create and run scientific workflows to search for compact binaries should read the documentation in the links at: @@ -93,13 +105,9 @@ binaries should read the documentation in the links at: workflow/pycbc_make_coinc_search_workflow workflow/pygrb.rst -Users who want to create and run parameter estimation workflows should read the -documentation at: - -.. toctree:: - :maxdepth: 2 - - inference +=================================================== +Template Banks, Hardware Injections, and more... +=================================================== Users who are interested in tools that PyCBC provides for various other analysis tasks (e.g. template bank generation, hardware injections, and testing @@ -114,6 +122,19 @@ template banks) should read the documentation at: faithsim upload_to_gracedb +========================================== +Extending PyCBC with external plugins +========================================== + +Would you like to use a waveform model that PyCBC doesn't have? Or maybe +you have your own waveform you'd like to use for a search, parameter estimation +, etc. PyCBC supports a plug-in archictecture for external waveform models. + +.. toctree:: + :maxdepth: 1 + + waveform_plugin + ========================================== Library Examples and Interactive Tutorials ========================================== diff --git a/docs/inference.rst b/docs/inference.rst index 996526449bc..97560ffe376 100644 --- a/docs/inference.rst +++ b/docs/inference.rst @@ -250,6 +250,11 @@ will remain fixed throughout the run. For example: approximant = IMRPhenomPv2 f_lower = 18 +In the example above, we choose the waveform model 'IMRPhenomPv2'. PyCBC comes +with access to waveforms provided by the lalsimulation package. If you'd like +to use a custom waveform outside of what PyCBC currently supports, +see :ref:`documentation on creating a plugin for PyCBC` + ^^^^^^^^^^^^ Setting data ^^^^^^^^^^^^ diff --git a/docs/waveform_plugin.rst b/docs/waveform_plugin.rst new file mode 100644 index 00000000000..fcfda531193 --- /dev/null +++ b/docs/waveform_plugin.rst @@ -0,0 +1,92 @@ +.. _waveform_plugin: + +------------------------------------------------------ +Making new waveform approximants available to PyCBC +------------------------------------------------------ + +================================================= +Adding a custom waveform model within a script +================================================= + +By example, the following script shows how to write a waveform model +in the form required for PyCBC. We can also make this new waveform directly +accessible by using the :py:func:`~pycbc.waveform.plugin.add_custom_waveform` function. +If you are developing in a notebook or self-contained script, this may be +what you want to do. However, if you want to make your waveform available +to pycbc-based executables such as PyCBC Inference, also read the next +section. + +There are two kinds of models you can make. In the example below, we +make a time-domain model. You can also make a freuqency-domain model. The only +difference is that your function should return an instance of :py:class:`~pycbc.types.frequencyseries.FrequencySeries` and +the required sample step option is `delta_f` instead of `delta_t`. + +Each waveform generation function must take only keyword arguments, and +should be able to take an arbitrary number of them. You may add new parameters +as you like. These will be automatically useable by PyCBC Inference and +other pycbc codes. + +Each waveform model must have an associate `approximant` name, which identifies +the model and distinguishes it from any other. If the name has already been +used, you should select a different name. By default, an error will be raised +unless overridden. + +.. plot:: ../examples/waveform/add_waveform.py + :include-source: + +================================================= +Creating a plugin for PyCBC +================================================= + +To make a waveform model universally available to PyCBC so it can be called +from PyCBC Inference, or the pycbc-based searched codes, you can create +a plugin package which advertises your model. PyCBC will automatically +detect your package and make your waveform model available for use. + +The steps are: + + * Create a waveform model just like as in the above example + * Create a python package for your module + * In your packages setup.py advertise that it contains a PyCBC compatible + waveform model in it's `entry_points` option. + +Your `setup.py` should look like the following, the key addition being the `entry_points` +parameter passed to setup.py. + +.. code-block:: python + + setup ( + name = 'pycbc-revchirp', + version = VERSION, + description = 'An example waveform plugin for PyCBC', + long_description = open('descr.rst').read(), + author = 'The PyCBC team', + author_email = 'alex.nitz@gmail.org', + url = 'http://www.pycbc.org/', + download_url = 'https://github.com/gwastro/revchirp/tarball/v%s' % VERSION, + keywords = ['pycbc', 'signal processing', 'gravitational waves'], + install_requires = ['pycbc'], + py_modules = ['revchirp'], + entry_points = {"pycbc.waveform.td":"revchirp = revchirp:reverse_chirp_td", + "pycbc.waveform.fd":"revchirp = revchirp:reverse_chirp_fd"}, + classifiers=[ + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', + 'Intended Audience :: Science/Research', + 'Natural Language :: English', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Astronomy', + 'Topic :: Scientific/Engineering :: Physics', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + ], + ) + +The format for the `entry_points` is `"capability":"approximant_name = module_path:function_name"`. +The module path may include dots if the module is within a package or sub-package. The +valid `capbility` is `pycbc.waveform.td` and `pycbc.waveform.fd` for time and frequency +domain waveform models, respectively. + +For a complete working minimal example of a PyCBC waveform plugin, see the +example package on github to +`make a reversed-chirp waveform `_ . diff --git a/examples/waveform/add_waveform.py b/examples/waveform/add_waveform.py new file mode 100644 index 00000000000..d2b970b0723 --- /dev/null +++ b/examples/waveform/add_waveform.py @@ -0,0 +1,47 @@ +def test_waveform(**args): + import numpy + from pycbc.types import TimeSeries + + flow = args['f_lower'] # Required parameter + dt = args['delta_t'] # Required parameter + fpeak = args['fpeak'] # A new parameter for my model + + t = numpy.arange(0, 10, dt) + f = t/t.max() * (fpeak - flow) + flow + a = t + + wf = numpy.exp(2.0j * numpy.pi * f * t) * a + + # Return product should be a pycbc time series in this case for + # each GW polarization + # + # + # Note that by convention, the time at 0 is a fiducial reference. + # For CBC waveforms, this would be set to where the merger occurs + offset = - len(t) * dt + wf = TimeSeries(wf, delta_t=dt, epoch=offset) + return wf.real(), wf.imag() + +import pylab +import pycbc.waveform + +# This tells pycbc about our new waveform so we can call it from standard +# pycbc functions. If this were a frequency-domain model, select 'frequency' +# instead of 'time' to this function call. +pycbc.waveform.add_custom_waveform('test', test_waveform, 'time', force=True) + +# Let's plot what our new waveform looks like +hp, hc = pycbc.waveform.get_td_waveform(approximant="test", + f_lower=20, fpeak=50, + delta_t=1.0/4096) +pylab.figure(0) +pylab.plot(hp.sample_times, hp) +pylab.xlabel('Time (s)') + +pylab.figure(1) +hf = hp.to_frequencyseries() +pylab.plot(hf.sample_frequencies, hf.real()) +pylab.xlabel('Frequency (Hz)') +pylab.xscale('log') +pylab.xlim(20, 100) +pylab.show() diff --git a/pycbc/waveform/__init__.py b/pycbc/waveform/__init__.py index 212d2b22fe6..2e3c228da3a 100644 --- a/pycbc/waveform/__init__.py +++ b/pycbc/waveform/__init__.py @@ -3,3 +3,8 @@ from pycbc.waveform.bank import * from pycbc.waveform.ringdown import * from pycbc.waveform.parameters import * + +from pycbc.waveform.plugin import (retrieve_waveform_plugins, + add_custom_waveform, + add_length_estimator) +retrieve_waveform_plugins() diff --git a/pycbc/waveform/parameters.py b/pycbc/waveform/parameters.py index 7fbd31274bf..d6dca185c39 100644 --- a/pycbc/waveform/parameters.py +++ b/pycbc/waveform/parameters.py @@ -515,6 +515,10 @@ def docstr(self, prefix='', include_label=True): # defined above. Defaults of None simply mean that the value is not passed into # the lal_dict structure and the waveform generator will take whatever default # behaviour +td_required = ParameterList([f_lower, delta_t, approximant]) +fd_required = ParameterList([f_lower, delta_f, approximant]) + +#### cbc_td_required = ParameterList([mass1, mass2, f_lower, delta_t, approximant]) cbc_fd_required = ParameterList([mass1, mass2, f_lower, delta_f, approximant]) diff --git a/pycbc/waveform/plugin.py b/pycbc/waveform/plugin.py new file mode 100644 index 00000000000..43f947212ba --- /dev/null +++ b/pycbc/waveform/plugin.py @@ -0,0 +1,66 @@ +""" Utilities for handling waveform plugins +""" + + +def add_custom_waveform(approximant, function, domain, force=False): + """ Make custom waveform available to pycbc + + Parameters + ---------- + approximant : str + The name of the waveform + function : function + The function to generate the waveform + domain : str + Either 'frequency' or 'time' to indicate the domain of the waveform. + """ + from pycbc.waveform.waveform import cpu_fd, cpu_td + + if domain == 'time': + if not force and (approximant in cpu_td): + raise RuntimeError("Can't load plugin waveform {}, the name is" + " already in use.".format(approximant)) + cpu_td[approximant] = function + elif domain == 'frequency': + if not force and (approximant in cpu_fd): + raise RuntimeError("Can't load plugin waveform {}, the name is" + " already in use.".format(approximant)) + cpu_fd[approximant] = function + else: + raise ValueError("Invalid domain ({}), should be " + "'time' or 'frequency'".format(domain)) + + +def add_length_estimator(approximant, function): + """ Add length estimator for an approximant + + Parameters + ---------- + approximant : str + Name of approximant + function : function + A function which takes kwargs and returns the waveform length + """ + from pycbc.waveform.waveform import _filter_time_lengths + if approximant in _filter_time_lengths: + raise RuntimeError("Can't load length estimator {}, the name is" + " already in use.".format(approximant)) + _filter_time_lengths[approximant] = function + + +def retrieve_waveform_plugins(): + """ Process external waveform plugins + """ + import pkg_resources + + # Check for fd waveforms + for plugin in pkg_resources.iter_entry_points('pycbc.waveform.fd'): + add_custom_waveform(plugin.name, plugin.resolve(), 'frequency') + + # Check for td waveforms + for plugin in pkg_resources.iter_entry_points('pycbc.waveform.td'): + add_custom_waveform(plugin.name, plugin.resolve(), 'time') + + # Check for wavveform length estimates + for plugin in pkg_resources.iter_entry_points('pycbc.waveform.length'): + add_length_estimator(plugin.name, plugin.resolve()) diff --git a/pycbc/waveform/waveform.py b/pycbc/waveform/waveform.py index 5443e08927e..f066a1d152f 100644 --- a/pycbc/waveform/waveform.py +++ b/pycbc/waveform/waveform.py @@ -67,9 +67,6 @@ class FailedWaveformError(Exception): parameters.td_waveform_params).default_dict() default_sgburst_args = {'eccentricity':0, 'polarization':0} - -td_required_args = parameters.cbc_td_required -fd_required_args = parameters.cbc_fd_required sgburst_required_args = ['q','frequency','hrss'] # td, fd, filter waveforms generated on the CPU @@ -141,7 +138,6 @@ def _check_lal_pars(p): return lal_pars def _lalsim_td_waveform(**p): - fail_tolerant_waveform_generation lal_pars = _check_lal_pars(p) #nonGRparams can be straightforwardly added if needed, however they have to # be invoked one by one @@ -184,6 +180,8 @@ def _lalsim_td_waveform(**p): return hp, hc +_lalsim_td_waveform.required = parameters.cbc_td_required + def _spintaylor_aligned_prec_swapper(**p): """ SpinTaylorF2 is only single spin, it also struggles with anti-aligned spin @@ -225,6 +223,8 @@ def _lalsim_fd_waveform(**p): #lal.DestroyDict(lal_pars) return hp, hc +_lalsim_fd_waveform.required = parameters.cbc_fd_required + def _lalsim_sgburst_waveform(**p): hp, hc = lalsimulation.SimBurstSineGaussian(float(p['q']), float(p['frequency']), @@ -352,28 +352,29 @@ def get_obj_attrs(obj): return pr -def props(obj, required_args=None, **kwargs): +def props(obj, **kwargs): """ Return a dictionary built from the combination of defaults, kwargs, and the attributes of the given object. """ pr = get_obj_attrs(obj) pr.update(kwargs) - if required_args is None: - required_args = [] - - # check that required args are given - missing = set(required_args) - set(pr.keys()) - if any(missing): - raise ValueError("Please provide {}".format(', '.join(missing))) - # Get the parameters to generate the waveform # Note that keyword arguments override values in the template object input_params = default_args.copy() input_params.update(pr) - return input_params +def check_args(args, required_args): + """ check that required args are given """ + missing = [] + for arg in required_args: + if (arg not in args) or (args[arg] is None): + missing.append(arg) + + if len(missing) != 0: + raise ValueError("Please provide {}".format(', '.join(missing))) + # Input parameter handling for bursts ######################################## def props_sgburst(obj, **kwargs): @@ -418,7 +419,7 @@ def get_fd_waveform_sequence(template=None, **kwds): """ kwds['delta_f'] = -1 kwds['f_lower'] = -1 - p = props(template, required_args=fd_required_args, **kwds) + p = props(template, required_args=parameters.cbc_fd_required, **kwds) lal_pars = _check_lal_pars(p) hp, hc = lalsimulation.SimInspiralChooseFDWaveformSequence(float(p['coa_phase']), @@ -455,12 +456,18 @@ def get_td_waveform(template=None, **kwargs): hcross: TimeSeries The cross polarization of the waveform. """ - input_params = props(template, required_args=td_required_args, **kwargs) + input_params = props(template, **kwargs) wav_gen = td_wav[type(_scheme.mgr.state)] if input_params['approximant'] not in wav_gen: raise ValueError("Approximant %s not available" % (input_params['approximant'])) - return wav_gen[input_params['approximant']](**input_params) + wav_gen = wav_gen[input_params['approximant']] + if hasattr(wav_gen, 'required'): + required = wav_gen.required + else: + required = parameters.td_required + check_args(input_params, required) + return wav_gen(**input_params) get_td_waveform.__doc__ = get_td_waveform.__doc__.format( params=parameters.td_waveform_params.docstr(prefix=" ", @@ -483,8 +490,7 @@ def get_fd_waveform(template=None, **kwargs): hcrosstilde: FrequencySeries The cross phase of the waveform in frequency domain. """ - - input_params = props(template, required_args=fd_required_args, **kwargs) + input_params = props(template, **kwargs) wav_gen = fd_wav[type(_scheme.mgr.state)] if input_params['approximant'] not in wav_gen: raise ValueError("Approximant %s not available" % @@ -503,7 +509,13 @@ def get_fd_waveform(template=None, **kwargs): "f_final") except KeyError: pass - return wav_gen[input_params['approximant']](**input_params) + wav_gen = wav_gen[input_params['approximant']] + if hasattr(wav_gen, 'required'): + required = wav_gen.required + else: + required = parameters.fd_required + check_args(input_params, required) + return wav_gen(**input_params) get_fd_waveform.__doc__ = get_fd_waveform.__doc__.format( @@ -1102,4 +1114,5 @@ def get_waveform_filter_length_in_time(approximant, template=None, **kwargs): "get_waveform_filter_length_in_time", "get_sgburst_waveform", "print_sgburst_approximants", "sgburst_approximants", "td_waveform_to_fd_waveform", "get_two_pol_waveform_filter", - "NoWaveformError", "FailedWaveformError", "get_td_waveform_from_fd"] + "NoWaveformError", "FailedWaveformError", "get_td_waveform_from_fd", + 'cpu_fd', 'cpu_td', '_filter_time_lengths'] diff --git a/tools/docker_build_dist.sh b/tools/docker_build_dist.sh index 5baab12e882..a64c8cbeaa3 100755 --- a/tools/docker_build_dist.sh +++ b/tools/docker_build_dist.sh @@ -56,7 +56,7 @@ if [ "x${PYCBC_CONTAINER}" == "xpycbc_rhel_virtualenv" ]; then yum -y install ligo-proxy-utils yum -y install ecp-cookie-init yum -y install python-virtualenv - yum -y install hdf5-static libxml2-static zlib-static libstdc++-static cfitsio-static glibc-static fftw-static gsl-static + yum -y install hdf5-static libxml2-static zlib-static libstdc++-static cfitsio-static glibc-static fftw-static gsl-static --skip-broken CVMFS_PATH=/cvmfs/oasis.opensciencegrid.org/ligo/sw/pycbc/${ENV_OS}/virtualenv mkdir -p ${CVMFS_PATH} From 4f75faeded2cb284dedbc856a8b2ae56075ea158 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Sat, 20 Jun 2020 20:27:09 +0200 Subject: [PATCH 02/68] Allow different burn-in iteration and ACL for independent MCMC chains (#3311) * use different acl for every chain in epsie * create base burn in class, move common functions to there; rename MCMCBurnInTests EnsembleMCMC, first stab at creating MCMC tests for independent chains * more changes to burn in module * simplify the attributes in the burn in classes * add write method to burn in classes * add write_data method to base_hdf * remove write_burn_in method from mcmc io; use the write method in burn in module instead * make use of new burn in functions in sampler/base_mcmc * have emcee and emcee pt use ensemble burn in tests * add compute_acf function to epsie * start separating ensemble and mcmc io methods * stop saving thin settings to file; just return on the fly * make read/write samples stand alone functions, and update emcee * rename write functions; update emcee * move multi temper read/write functions to stand alone and update emcee_pt * pass kwargs from emcee(_pt) io functions * simplify get_slice method * add function to base_mcmc to calculate the number of samples in a chain * use nsamples_in_chain function to calculate effective number of samples * add read_raw_samples function that can handle differing number of samples from different chains * add forgotten import * use write/read functions from base_multitemper in epsie io * use stand alone functions for computing ensemble acf/acls * separate out ensemble-specific attributes in sampler module; update emcee and emcee_pt * add acl and effective_nsample methods to epsie * simplify writing acls and burn in * fix various bugs and typos * use a single function for writing both acl and raw_acls * add some more logging info to burn in * reduce identical blocks of code in burn in module * fix self -> fp in read_raw_samples * reduce code duplication in base io and simplify read raw samples function * fix missed rename * reduce code redundacy in sampler/base_multitemper * whitespace * fix bugs and typos in burn_in module * fix code climate issues * use map in compute_acl * more code climate fixes * remove unused variable; try to silence pylint * fix issues reading epsie samples * only load samples from burned in chains by default * add act property to mcmc files * fix act logging message * fix effective number of samples calculation in epsie * remap walkers option to chains for reading samples * fix thinning update * fix acceptance ratio and temperature data thinning in epsie * allow for different fields to have differing number of temperatures when loading * don't try to figure out how many samples will be loaded ahead of time * store acts in file instead of acls * write burn in status to file before computing acls * drop write_acts function * fix issue with getting specific chains * fix typo * code climate issues * fix plot_acl --- bin/inference/pycbc_inference_plot_acl | 7 +- pycbc/inference/burn_in.py | 591 +++++++++++--- pycbc/inference/io/base_hdf.py | 131 ++-- pycbc/inference/io/base_mcmc.py | 805 ++++++++++++++------ pycbc/inference/io/base_multitemper.py | 463 +++++++---- pycbc/inference/io/emcee.py | 43 +- pycbc/inference/io/emcee_pt.py | 45 +- pycbc/inference/io/epsie.py | 75 +- pycbc/inference/sampler/base_mcmc.py | 418 +++++----- pycbc/inference/sampler/base_multitemper.py | 438 +++++++---- pycbc/inference/sampler/emcee.py | 61 +- pycbc/inference/sampler/emcee_pt.py | 64 +- pycbc/inference/sampler/epsie.py | 95 ++- 13 files changed, 2313 insertions(+), 923 deletions(-) diff --git a/bin/inference/pycbc_inference_plot_acl b/bin/inference/pycbc_inference_plot_acl index 8f20e0c8302..348dbb10763 100644 --- a/bin/inference/pycbc_inference_plot_acl +++ b/bin/inference/pycbc_inference_plot_acl @@ -78,14 +78,11 @@ for param_name in parameters: logging.info("Plotting autocorrelation times") plt.hist(acls, opts.bins, histtype="step", label=labels[param_name]) -# get the file's acl -fpacl = fp.thinned_by * fp[fp.sampler_group].attrs['acl'] - plt.xlabel("Autocorrelation time") plt.ylabel(r'Number of walkers') -# plot autocorrelation length saved in hdf file -plt.axvline(fpacl, linestyle='--') +# plot autocorrelation time saved in hdf file +plt.axvline(fp.act, linestyle='--') plt.legend() # save figure with meta-data diff --git a/pycbc/inference/burn_in.py b/pycbc/inference/burn_in.py index a6092d7f78d..23196cdb8cc 100644 --- a/pycbc/inference/burn_in.py +++ b/pycbc/inference/burn_in.py @@ -28,6 +28,9 @@ from __future__ import division +import logging +from abc import ABCMeta, abstractmethod +from six import add_metaclass import numpy from scipy.stats import ks_2samp @@ -149,6 +152,91 @@ def posterior_step(logposts, dim): return idx +def nacl(nsamples, acls, nacls=5): + """Burn in based on ACL. + + This applies the following test to determine burn in: + + 1. The first half of the chain is ignored. + + 2. An ACL is calculated from the second half. + + 3. If ``nacls`` times the ACL is < the length of the chain / 2, + the chain is considered to be burned in at the half-way point. + + Parameters + ---------- + nsamples : int + The number of samples of in the chain(s). + acls : dict + Dictionary of parameter -> ACL(s). The ACLs for each parameter may + be an integer or an array of integers (for multiple chains). + nacls : int, optional + The number of ACLs the chain(s) must have gone past the halfway point + in order to be considered burned in. Default is 5. + + Returns + ------- + dict + Dictionary of parameter -> boolean(s) indicating if the chain(s) pass + the test. If an array of values was provided for the acls, the values + will be arrays of booleans. + """ + kstart = int(nsamples / 2.) + return {param: (nacls * acl) < kstart for (param, acl) in acls.items()} + + +def evaluate_tests(burn_in_test, test_is_burned_in, test_burn_in_iter): + """Evaluates burn in data from multiple tests. + + The iteration to use for burn-in depends on the logic in the burn-in + test string. For example, if the test was 'max_posterior | nacl' and + max_posterior burned-in at iteration 5000 while nacl burned in at + iteration 6000, we'd want to use 5000 as the burn-in iteration. + However, if the test was 'max_posterior & nacl', we'd want to use + 6000 as the burn-in iteration. This function handles all cases by + doing the following: first, take the collection of burn in iterations + from all the burn in tests that were applied. Next, cycle over the + iterations in increasing order, checking which tests have burned in + by that point. Then evaluate the burn-in string at that point to see + if it passes, and if so, what the iteration is. The first point that + the test passes is used as the burn-in iteration. + + Parameters + ---------- + burn_in_test : str + The test to apply; e.g., ``'max_posterior & nacl'``. + test_is_burned_in : dict + Dictionary of test name -> boolean indicating whether a specific burn + in test has passed. + test_burn_in_iter : dict + Dictionary of test name -> int indicating when a specific test burned + in. + + Returns + ------- + is_burned_in : bool + Whether or not the data passes all burn in tests. + burn_in_iteration : + The iteration at which all the tests pass. If the tests did not all + pass (``is_burned_in`` is false), then returns + :py:data:`NOT_BURNED_IN_ITER`. + """ + burn_in_iters = numpy.unique(list(test_burn_in_iter.values())) + burn_in_iters.sort() + for ii in burn_in_iters: + test_results = {t: (test_is_burned_in[t] & + 0 <= test_burn_in_iter[t] <= ii) + for t in test_is_burned_in} + is_burned_in = eval(burn_in_test, {"__builtins__": None}, + test_results) + if is_burned_in: + break + if not is_burned_in: + ii = NOT_BURNED_IN_ITER + return is_burned_in, ii + + # # ============================================================================= # @@ -158,32 +246,42 @@ def posterior_step(logposts, dim): # -class MCMCBurnInTests(object): - """Provides methods for estimating burn-in of an ensemble MCMC.""" - +@add_metaclass(ABCMeta) +class BaseBurnInTests(object): + """Base class for burn in tests.""" available_tests = ('halfchain', 'min_iterations', 'max_posterior', - 'posterior_step', 'nacl', 'ks_test', + 'posterior_step', 'nacl', ) + # pylint: disable=unnecessary-pass + def __init__(self, sampler, burn_in_test, **kwargs): self.sampler = sampler # determine the burn-in tests that are going to be done self.do_tests = get_vars_from_arg(burn_in_test) self.burn_in_test = burn_in_test - self.burn_in_data = {t: {} for t in self.do_tests} self.is_burned_in = False self.burn_in_iteration = NOT_BURNED_IN_ITER - self.burn_in_index = NOT_BURNED_IN_ITER + self.test_is_burned_in = {} # burn in status per test + self.test_burn_in_iteration = {} # burn in iter per test + self.test_aux_info = {} # any additional information the test stores # Arguments specific to each test... # for nacl: self._nacls = int(kwargs.pop('nacls', 5)) - # for kstest: - self._ksthreshold = float(kwargs.pop('ks_threshold', 0.9)) # for max_posterior and posterior_step self._ndim = int(kwargs.pop('ndim', len(sampler.variable_params))) # for min iterations self._min_iterations = int(kwargs.pop('min_iterations', 0)) + @abstractmethod + def burn_in_index(self, filename): + """The burn in index (retrieved from the iteration). + + This is an abstract method because how this is evaluated depends on + if this is an ensemble MCMC or not. + """ + pass + def _getniters(self, filename): """Convenience function to get the number of iterations in the file. @@ -256,87 +354,360 @@ def _getacls(self, filename, start_index): # since we calculated it, save the acls to the sampler... # but only do this if this is the only burn in test if len(self.do_tests) == 1: - self.sampler.acls = acls + self.sampler.raw_acls = acls return acls + def _getaux(self, test): + """Convenience function for getting auxilary information. + + Parameters + ---------- + test : str + The name of the test to retrieve auxilary information about. + + Returns + ------- + dict + The ``test_aux_info[test]`` dictionary. If a dictionary does + not exist yet for the given test, an empty dictionary will be + created and saved to ``test_aux_info[test]``. + """ + try: + aux = self.test_aux_info[test] + except KeyError: + aux = self.test_aux_info[test] = {} + return aux + def halfchain(self, filename): """Just uses half the chain as the burn-in iteration. """ niters = self._getniters(filename) - data = self.burn_in_data['halfchain'] # this test cannot determine when something will burn in # only when it was not burned in in the past - data['is_burned_in'] = True - data['burn_in_iteration'] = niters/2 + self.test_is_burned_in['halfchain'] = True + self.test_burn_in_iteration['halfchain'] = niters//2 def min_iterations(self, filename): """Just checks that the sampler has been run for the minimum number of iterations. """ niters = self._getniters(filename) - data = self.burn_in_data['min_iterations'] - data['is_burned_in'] = self._min_iterations < niters - if data['is_burned_in']: - data['burn_in_iteration'] = self._min_iterations + is_burned_in = self._min_iterations < niters + if is_burned_in: + burn_in_iter = self._min_iterations else: - data['burn_in_iteration'] = NOT_BURNED_IN_ITER + burn_in_iter = NOT_BURNED_IN_ITER + self.test_is_burned_in['min_iterations'] = is_burned_in + self.test_burn_in_iteration['min_iterations'] = burn_in_iter + @abstractmethod def max_posterior(self, filename): - """Applies max posterior test to self.""" + """Carries out the max posterior test and stores the results.""" + pass + + @abstractmethod + def posterior_step(self, filename): + """Carries out the posterior step test and stores the results.""" + pass + + @abstractmethod + def nacl(self, filename): + """Carries out the nacl test and stores the results.""" + pass + + @abstractmethod + def evaluate(self, filename): + """Performs all tests and evaluates the results to determine if and + when all tests pass. + """ + pass + + def write(self, fp, path=None): + """Writes burn-in info to an open HDF file. + + Parameters + ---------- + fp : pycbc.inference.io.base.BaseInferenceFile + Open HDF file to write the data to. The HDF file should be an + instance of a pycbc BaseInferenceFile. + path : str, optional + Path in the HDF file to write the data to. Default is (None) is + to write to the path given by the file's ``sampler_group`` + attribute. + """ + if path is None: + path = fp.sampler_group + fp.write_data('burn_in_test', self.burn_in_test, path) + fp.write_data('is_burned_in', self.is_burned_in, path) + fp.write_data('burn_in_iteration', self.burn_in_iteration, path) + testgroup = 'burn_in_tests' + # write individual test data + for tst in self.do_tests: + subpath = '/'.join([path, testgroup, tst]) + fp.write_data('is_burned_in', self.test_is_burned_in[tst], subpath) + fp.write_data('burn_in_iteration', + self.test_burn_in_iteration[tst], + subpath) + # write auxiliary info + if tst in self.test_aux_info: + for name, data in self.test_aux_info[tst].items(): + fp.write_data(name, data, subpath) + + @staticmethod + def _extra_tests_from_config(cp, section, tag): + """For loading class-specific tests.""" + # pylint: disable=unused-argument + return {} + + @classmethod + def from_config(cls, cp, sampler): + """Loads burn in from section [sampler-burn_in].""" + section = 'sampler' + tag = 'burn_in' + burn_in_test = cp.get_opt_tag(section, 'burn-in-test', tag) + kwargs = {} + if cp.has_option_tag(section, 'nacl', tag): + kwargs['nacl'] = int(cp.get_opt_tag(section, 'nacl', tag)) + if cp.has_option_tag(section, 'ndim', tag): + kwargs['ndim'] = int( + cp.get_opt_tag(section, 'ndim', tag)) + if cp.has_option_tag(section, 'min-iterations', tag): + kwargs['min_iterations'] = int( + cp.get_opt_tag(section, 'min-iterations', tag)) + # load any class specific tests + kwargs.update(cls._extra_tests_from_config(cp, section, tag)) + return cls(sampler, burn_in_test, **kwargs) + + +class MCMCBurnInTests(BaseBurnInTests): + """Burn-in tests for collections of independent MCMC chains. + + This differs from EnsembleMCMCBurnInTests in that chains are treated as + being independent of each other. The ``is_burned_in`` attribute will be + True if `any` chain passes the burn in tests (whereas in MCMCBurnInTests, + all chains must pass the burn in tests). In other words, independent + samples can be collected even if all of the chains are not burned in. + """ + def __init__(self, sampler, burn_in_test, **kwargs): + super(MCMCBurnInTests, self).__init__(sampler, burn_in_test, **kwargs) + try: + nchains = sampler.nchains + except AttributeError: + nchains = sampler.nwalkers + self.nchains = nchains + self.is_burned_in = numpy.zeros(self.nchains, dtype=bool) + self.burn_in_iteration = numpy.repeat(NOT_BURNED_IN_ITER, self.nchains) + + def burn_in_index(self, filename): + """The burn in index (retrieved from the iteration).""" + burn_in_index = self._iter2index(filename, self.burn_in_iteration) + # don't set if it isn't burned in + burn_in_index[~self.is_burned_in] = NOT_BURNED_IN_ITER + return burn_in_index + + def max_posterior(self, filename): + """Applies max posterior test.""" logposts = self._getlogposts(filename) burn_in_idx, is_burned_in = max_posterior(logposts, self._ndim) - data = self.burn_in_data['max_posterior'] - # required things to store - data['is_burned_in'] = is_burned_in.all() - if data['is_burned_in']: - data['burn_in_iteration'] = self._index2iter( - filename, burn_in_idx.max()) - else: - data['burn_in_iteration'] = NOT_BURNED_IN_ITER - # additional info - data['iteration_per_walker'] = self._index2iter(filename, burn_in_idx) - data['status_per_walker'] = is_burned_in + # convert index to iterations + burn_in_iter = self._index2iter(filename, burn_in_idx) + burn_in_iter[~is_burned_in] = NOT_BURNED_IN_ITER + # save + test = 'max_posterior' + self.test_is_burned_in[test] = is_burned_in + self.test_burn_in_iteration[test] = burn_in_iter def posterior_step(self, filename): """Applies the posterior-step test.""" logposts = self._getlogposts(filename) burn_in_idx = numpy.array([posterior_step(logps, self._ndim) for logps in logposts]) - data = self.burn_in_data['posterior_step'] # this test cannot determine when something will burn in # only when it was not burned in in the past - data['is_burned_in'] = True - data['burn_in_iteration'] = self._index2iter( - filename, burn_in_idx.max()) - # additional info - data['iteration_per_walker'] = self._index2iter(filename, burn_in_idx) + test = 'posterior_step' + if test not in self.test_is_burned_in: + self.test_is_burned_in[test] = numpy.ones(self.nchains, dtype=bool) + # convert index to iterations + self.test_burn_in_iteration[test] = self._index2iter(filename, + burn_in_idx) def nacl(self, filename): - """Burn in based on ACL. + """Applies the :py:func:`nacl` test.""" + nsamples = self._getnsamples(filename) + acls = self._getacls(filename, start_index=nsamples//2) + is_burned_in = nacl(nsamples, acls, self._nacls) + # stack the burn in results into an nparams x nchains array + burn_in_per_chain = numpy.stack(list(is_burned_in.values())).all( + axis=0) + # store + test = 'nacl' + self.test_is_burned_in[test] = burn_in_per_chain + try: + burn_in_iter = self.test_burn_in_iteration[test] + except KeyError: + # hasn't been stored yet + burn_in_iter = numpy.repeat(NOT_BURNED_IN_ITER, self.nchains) + self.test_burn_in_iteration[test] = burn_in_iter + burn_in_iter[burn_in_per_chain] = self._index2iter(filename, + nsamples//2) + # add the status for each parameter as additional information + self.test_aux_info[test] = is_burned_in - This applies the following test to determine burn in: + def evaluate(self, filename): + """Runs all of the burn-in tests.""" + # evaluate all the tests + for tst in self.do_tests: + logging.info("Evaluating %s burn-in test", tst) + getattr(self, tst)(filename) + # evaluate each chain at a time + for ci in range(self.nchains): + # some tests (like halfchain) just store a single bool for all + # chains + tibi = {t: r[ci] if isinstance(r, numpy.ndarray) else r + for t, r in self.test_is_burned_in.items()} + tbi = {t: r[ci] if isinstance(r, numpy.ndarray) else r + for t, r in self.test_burn_in_iteration.items()} + is_burned_in, burn_in_iter = evaluate_tests(self.burn_in_test, + tibi, tbi) + self.is_burned_in[ci] = is_burned_in + self.burn_in_iteration[ci] = burn_in_iter + logging.info("Number of chains burned in: %i of %i", + self.is_burned_in.sum(), self.nchains) + + def write(self, fp, path=None): + """Writes burn-in info to an open HDF file. - 1. The first half of the chain is ignored. + Parameters + ---------- + fp : pycbc.inference.io.base.BaseInferenceFile + Open HDF file to write the data to. The HDF file should be an + instance of a pycbc BaseInferenceFile. + path : str, optional + Path in the HDF file to write the data to. Default is (None) is + to write to the path given by the file's ``sampler_group`` + attribute. + """ + if path is None: + path = fp.sampler_group + super(MCMCBurnInTests, self).write(fp, path) + # add number of chains burned in as additional metadata + fp.write_data('nchains_burned_in', self.is_burned_in.sum(), path) - 2. An ACL is calculated from the second half. - 3. If ``nacls`` times the ACL is < the length of the chain / 2, - the chain is considered to be burned in at the half-way point. +class MultiTemperedMCMCBurnInTests(MCMCBurnInTests): + """Adds support for multiple temperatures to + :py:class:`MCMCBurnInTests`. + """ + + def _getacls(self, filename, start_index): + """Convenience function for calculating acls for the given filename. + + This function is used by the ``n_acl`` burn-in test. That function + expects the returned ``acls`` dict to just report a single ACL for + each parameter. Since multi-tempered samplers return an array of ACLs + for each parameter instead, this takes the max over the array before + returning. + + Since we calculate the acls, this will also store it to the sampler. + + Parameters + ---------- + filename : str + Name of the file to retrieve samples from. + start_index : int + Index to start calculating ACLs. + + Returns + ------- + dict : + Dictionary of parameter names -> array giving ACL for each chain. """ + acls = super(MultiTemperedMCMCBurnInTests, self)._getacls( + filename, start_index) + # acls will have shape ntemps x nchains, flatten to nchains + return {param: vals.max(axis=0) for (param, vals) in acls.items()} + + def _getlogposts(self, filename): + """Convenience function for retrieving log posteriors. + + This just gets the coldest temperature chain, and returns arrays with + shape nwalkers x niterations, so the parent class can run the same + ``posterior_step`` function. + """ + return _multitemper_getlogposts(self.sampler, filename) + + +class EnsembleMCMCBurnInTests(BaseBurnInTests): + """Provides methods for estimating burn-in of an ensemble MCMC.""" + + available_tests = ('halfchain', 'min_iterations', 'max_posterior', + 'posterior_step', 'nacl', 'ks_test', + ) + + def __init__(self, sampler, burn_in_test, **kwargs): + super(EnsembleMCMCBurnInTests, self).__init__( + sampler, burn_in_test, **kwargs) + # for kstest + self._ksthreshold = float(kwargs.pop('ks_threshold', 0.9)) + + def burn_in_index(self, filename): + """The burn in index (retrieved from the iteration).""" + if self.is_burned_in: + index = self._iter2index(filename, self.burn_in_iteration) + else: + index = NOT_BURNED_IN_ITER + return index + + def max_posterior(self, filename): + """Applies max posterior test to self.""" + logposts = self._getlogposts(filename) + burn_in_idx, is_burned_in = max_posterior(logposts, self._ndim) + all_burned_in = is_burned_in.all() + if all_burned_in: + burn_in_iter = self._index2iter(filename, burn_in_idx.max()) + else: + burn_in_iter = NOT_BURNED_IN_ITER + # store + test = 'max_posterior' + self.test_is_burned_in[test] = all_burned_in + self.test_burn_in_iteration[test] = burn_in_iter + aux = self._getaux(test) + # additional info + aux['iteration_per_walker'] = self._index2iter(filename, burn_in_idx) + aux['status_per_walker'] = is_burned_in + + def posterior_step(self, filename): + """Applies the posterior-step test.""" + logposts = self._getlogposts(filename) + burn_in_idx = numpy.array([posterior_step(logps, self._ndim) + for logps in logposts]) + burn_in_iters = self._index2iter(filename, burn_in_idx) + # this test cannot determine when something will burn in + # only when it was not burned in in the past + test = 'posterior_step' + self.test_is_burned_in[test] = True + self.test_burn_in_iteration[test] = burn_in_iters.max() + # store the iteration per walker as additional info + aux = self._getaux(test) + aux['iteration_per_walker'] = burn_in_iters + + def nacl(self, filename): + """Applies the :py:func:`nacl` test.""" nsamples = self._getnsamples(filename) - kstart = int(nsamples / 2.) - acls = self._getacls(filename, start_index=kstart) - is_burned_in = {param: (self._nacls * acl) < kstart - for (param, acl) in acls.items()} - data = self.burn_in_data['nacl'] - # required things to store - data['is_burned_in'] = all(is_burned_in.values()) - if data['is_burned_in']: - data['burn_in_iteration'] = self._index2iter(filename, kstart) + acls = self._getacls(filename, start_index=nsamples//2) + is_burned_in = nacl(nsamples, acls, self._nacls) + all_burned_in = all(is_burned_in.values()) + if all_burned_in: + burn_in_iter = self._index2iter(filename, nsamples//2) else: - data['burn_in_iteration'] = NOT_BURNED_IN_ITER - # additional information - data['status_per_parameter'] = is_burned_in + burn_in_iter = NOT_BURNED_IN_ITER + # store + test = 'nacl' + self.test_is_burned_in[test] = all_burned_in + self.test_burn_in_iteration[test] = burn_in_iter + # store the status per parameter as additional info + aux = self._getaux(test) + aux['status_per_parameter'] = is_burned_in def ks_test(self, filename): """Applies ks burn-in test.""" @@ -352,76 +723,49 @@ def ks_test(self, filename): # is_the_same is a dictionary of params --> bool indicating whether or # not the 1D marginal is the same at the half way point is_the_same = ks_test(samples1, samples2, threshold=self._ksthreshold) - data = self.burn_in_data['ks_test'] - # required things to store - data['is_burned_in'] = all(is_the_same.values()) - if data['is_burned_in']: - data['burn_in_iteration'] = self._index2iter( - filename, int(nsamples/2.)) + is_burned_in = all(is_the_same.values()) + if is_burned_in: + burn_in_iter = self._index2iter(filename, int(nsamples//2)) else: - data['burn_in_iteration'] = NOT_BURNED_IN_ITER - # additional - data['status_per_parameter'] = is_the_same + burn_in_iter = NOT_BURNED_IN_ITER + # store + test = 'ks_test' + self.test_is_burned_in[test] = is_burned_in + self.test_burn_in_iteration[test] = burn_in_iter + # store the test per parameter as additional info + aux = self._getaux(test) + aux['status_per_parameter'] = is_the_same def evaluate(self, filename): """Runs all of the burn-in tests.""" + # evaluate all the tests for tst in self.do_tests: + logging.info("Evaluating %s burn-in test", tst) getattr(self, tst)(filename) - # The iteration to use for burn-in depends on the logic in the burn-in - # test string. For example, if the test was 'max_posterior | nacl' and - # max_posterior burned-in at iteration 5000 while nacl burned in at - # iteration 6000, we'd want to use 5000 as the burn-in iteration. - # However, if the test was 'max_posterior & nacl', we'd want to use - # 6000 as the burn-in iteration. The code below handles all cases by - # doing the following: first, take the collection of burn in iterations - # from all the burn in tests that were applied. Next, cycle over the - # iterations in increasing order, checking which tests have burned in - # by that point. Then evaluate the burn-in string at that point to see - # if it passes, and if so, what the iteration is. The first point that - # the test passes is used as the burn-in iteration. - data = self.burn_in_data - burn_in_iters = numpy.unique([data[t]['burn_in_iteration'] - for t in self.do_tests]) - burn_in_iters.sort() - for ii in burn_in_iters: - test_results = {t: (data[t]['is_burned_in'] & - 0 <= data[t]['burn_in_iteration'] <= ii) - for t in self.do_tests} - is_burned_in = eval(self.burn_in_test, {"__builtins__": None}, - test_results) - if is_burned_in: - break + is_burned_in, burn_in_iter = evaluate_tests( + self.burn_in_test, self.test_is_burned_in, + self.test_burn_in_iteration) self.is_burned_in = is_burned_in - if is_burned_in: - self.burn_in_iteration = ii - self.burn_in_index = self._iter2index(filename, ii) - else: - self.burn_in_iteration = NOT_BURNED_IN_ITER - self.burn_in_index = NOT_BURNED_IN_ITER - - @classmethod - def from_config(cls, cp, sampler): - """Loads burn in from section [sampler-burn_in].""" - section = 'sampler' - tag = 'burn_in' - burn_in_test = cp.get_opt_tag(section, 'burn-in-test', tag) + self.burn_in_iteration = burn_in_iter + logging.info("Is burned in: %r", self.is_burned_in) + if self.is_burned_in: + logging.info("Burn-in iteration: %i", + int(self.burn_in_iteration)) + + @staticmethod + def _extra_tests_from_config(cp, section, tag): + """Loads the ks test settings from the config file.""" kwargs = {} - if cp.has_option_tag(section, 'nacl', tag): - kwargs['nacl'] = int(cp.get_opt_tag(section, 'nacl', tag)) if cp.has_option_tag(section, 'ks-threshold', tag): kwargs['ks_threshold'] = float( cp.get_opt_tag(section, 'ks-threshold', tag)) - if cp.has_option_tag(section, 'ndim', tag): - kwargs['ndim'] = int( - cp.get_opt_tag(section, 'ndim', tag)) - if cp.has_option_tag(section, 'min-iterations', tag): - kwargs['min_iterations'] = int( - cp.get_opt_tag(section, 'min-iterations', tag)) - return cls(sampler, burn_in_test, **kwargs) + return kwargs -class MultiTemperedMCMCBurnInTests(MCMCBurnInTests): - """Adds support for multiple temperatures to the MCMCBurnInTests.""" +class EnsembleMultiTemperedMCMCBurnInTests(EnsembleMCMCBurnInTests): + """Adds support for multiple temperatures to + :py:class:`EnsembleMCMCBurnInTests`. + """ def _getacls(self, filename, start_index): """Convenience function for calculating acls for the given filename. @@ -434,7 +778,7 @@ def _getacls(self, filename, start_index): Since we calculate the acls, this will also store it to the sampler. """ - acls = super(MultiTemperedMCMCBurnInTests, self)._getacls( + acls = super(EnsembleMultiTemperedMCMCBurnInTests, self)._getacls( filename, start_index) # return the max for each parameter return {param: vals.max() for (param, vals) in acls.items()} @@ -446,13 +790,18 @@ def _getlogposts(self, filename): shape nwalkers x niterations, so the parent class can run the same ``posterior_step`` function. """ - with self.sampler.io(filename, 'r') as fp: - samples = fp.read_raw_samples( - ['loglikelihood', 'logprior'], thin_start=0, thin_interval=1, - temps=0, flatten=False) - # reshape to drop the first dimension - for (stat, arr) in samples.items(): - _, nwalkers, niterations = arr.shape - samples[stat] = arr.reshape((nwalkers, niterations)) - logposts = samples['loglikelihood'] + samples['logprior'] - return logposts + return _multitemper_getlogposts(self.sampler, filename) + + +def _multitemper_getlogposts(sampler, filename): + """Retrieve log posteriors for multi tempered samplers.""" + with sampler.io(filename, 'r') as fp: + samples = fp.read_raw_samples( + ['loglikelihood', 'logprior'], thin_start=0, thin_interval=1, + temps=0, flatten=False) + # reshape to drop the first dimension + for (stat, arr) in samples.items(): + _, nwalkers, niterations = arr.shape + samples[stat] = arr.reshape((nwalkers, niterations)) + logposts = samples['loglikelihood'] + samples['logprior'] + return logposts diff --git a/pycbc/inference/io/base_hdf.py b/pycbc/inference/io/base_hdf.py index 6014543ed17..c7b4f38795f 100644 --- a/pycbc/inference/io/base_hdf.py +++ b/pycbc/inference/io/base_hdf.py @@ -347,69 +347,25 @@ def write_effective_nsamples(self, effective_nsamples): def thin_start(self): """The default start index to use when reading samples. - This tries to read from ``thin_start`` in the ``attrs``. If it isn't - there, just returns 0.""" - try: - return self.attrs['thin_start'] - except KeyError: - return 0 - - @thin_start.setter - def thin_start(self, thin_start): - """Sets the thin start attribute. - - Parameters - ---------- - thin_start : int or None - Value to set the thin start to. + Unless overridden by sub-class attribute, just returns 0. """ - self.attrs['thin_start'] = thin_start + return 0 @property def thin_interval(self): """The default interval to use when reading samples. - This tries to read from ``thin_interval`` in the ``attrs``. If it - isn't there, just returns 1. - """ - try: - return self.attrs['thin_interval'] - except KeyError: - return 1 - - @thin_interval.setter - def thin_interval(self, thin_interval): - """Sets the thin start attribute. - - Parameters - ---------- - thin_interval : int or None - Value to set the thin interval to. + Unless overridden by sub-class attribute, just returns 1. """ - self.attrs['thin_interval'] = thin_interval + return 1 @property def thin_end(self): """The defaut end index to use when reading samples. - This tries to read from ``thin_end`` in the ``attrs``. If it isn't - there, just returns None. - """ - try: - return self.attrs['thin_end'] - except KeyError: - return None - - @thin_end.setter - def thin_end(self, thin_end): - """Sets the thin end attribute. - - Parameters - ---------- - thin_end : int or None - Value to set the thin end to. + Unless overriden by sub-class attribute, just return ``None``. """ - self.attrs['thin_end'] = thin_end + return None @property def cmd(self): @@ -615,37 +571,29 @@ def write_command_line(self): previous = [] self.attrs["cmd"] = cmd + previous - def get_slice(self, thin_start=None, thin_interval=None, thin_end=None): - """Formats a slice using the given arguments that can be used to - retrieve a thinned array from an InferenceFile. + @staticmethod + def get_slice(thin_start=None, thin_interval=None, thin_end=None): + """Formats a slice to retrieve a thinned array from an HDF file. Parameters ---------- - thin_start : int, optional - The starting index to use. If None, will use the ``thin_start`` - attribute. - thin_interval : int, optional - The interval to use. If None, will use the ``thin_interval`` - attribute. - thin_end : int, optional - The end index to use. If None, will use the ``thin_end`` attribute. + thin_start : float or int, optional + The starting index to use. If provided, the ``int`` will be taken. + thin_interval : float or int, optional + The interval to use. If provided the ceiling of it will be taken. + thin_end : float or int, optional + The end index to use. If provided, the ``int`` will be taken. Returns ------- slice : The slice needed. """ - if thin_start is None: - thin_start = int(self.thin_start) - else: + if thin_start is not None: thin_start = int(thin_start) - if thin_interval is None: - thin_interval = self.thin_interval - else: + if thin_interval is not None: thin_interval = int(numpy.ceil(thin_interval)) - if thin_end is None: - thin_end = self.thin_end - else: + if thin_end is not None: thin_end = int(thin_end) return slice(thin_start, thin_end, thin_interval) @@ -811,3 +759,46 @@ def write_kwargs_to_attrs(cls, attrs, **kwargs): cls.write_kwargs_to_attrs(attrs, **val) else: attrs[arg] = val + + def write_data(self, name, data, path=None): + """Convenience function to write data. + + Given ``data`` is written as a dataset with ``name`` in ``path``. + If the data hasn't been written yet, the dataset will be created. + Otherwise, will overwrite the data that is there. If data already + exists in the file with the same name and path, the given data must + have the same shape. + + Parameters + ---------- + name : str + The name to associate with the data. This will be the dataset + name (if data is array-like) or the key in the attrs. + data : array, dict, or atomic + The data to write. If a dictionary, a subgroup will be created + for each key, and the values written there. This will be done + recursively until an array or atomic (e.g., float, int, str), is + found. Otherwise, the data is written to the given name. + path : str, optional + Write to the given path. Default (None) will write to the top + level. If the path does not exist in the file, it will be + created. + """ + if path is None: + path = '/' + try: + group = self[path] + except KeyError: + # create the group + self.create_group(path) + group = self[path] + if isinstance(data, dict): + # call myself for each key, value pair in the dictionary + for key, val in data.items(): + self.write_data(key, val, path='/'.join([path, name])) + else: + try: + group[name][()] = data + except KeyError: + # dataset doesn't exist yet + group[name] = data diff --git a/pycbc/inference/io/base_mcmc.py b/pycbc/inference/io/base_mcmc.py index 306c934bc8b..d55415eeaf7 100644 --- a/pycbc/inference/io/base_mcmc.py +++ b/pycbc/inference/io/base_mcmc.py @@ -31,8 +31,13 @@ import numpy import argparse -class MCMCMetadataIO(object): + +class CommonMCMCMetadataIO(object): """Provides functions for reading/writing MCMC metadata to file. + + The functions here are common to both standard MCMC (in which chains + are independent) and ensemble MCMC (in which chains/walkers share + information). """ def write_resume_point(self): """Keeps a list of the number of iterations that were in a file when a @@ -59,8 +64,25 @@ def niterations(self): @property def nwalkers(self): - """Returns the number of walkers used by the sampler.""" - return self[self.sampler_group].attrs['nwalkers'] + """Returns the number of walkers used by the sampler. + + Alias of ``nchains``. + """ + try: + return self[self.sampler_group].attrs['nwalkers'] + except KeyError: + return self[self.sampler_group].attrs['nchains'] + + @property + def nchains(self): + """Returns the number of chains used by the sampler. + + Alias of ``nwalkers``. + """ + try: + return self[self.sampler_group].attrs['nchains'] + except KeyError: + return self[self.sampler_group].attrs['nwalkers'] def _thin_data(self, group, params, thin_interval): """Thins data on disk by the given interval. @@ -111,12 +133,6 @@ def thin(self, thin_interval): self._thin_data(self.samples_group, params, new_interval) # store the interval that samples were thinned by self.thinned_by = thin_interval - # If a default thin interval and thin start exist, reduce them by the - # thinned interval. If the thin interval is not an integer multiple - # of the original, we'll round up, to avoid getting samples from - # before the burn in / at an interval less than the ACL. - self.thin_start = int(numpy.ceil(self.thin_start/new_interval)) - self.thin_interval = int(numpy.ceil(self.thin_interval/new_interval)) @property def thinned_by(self): @@ -176,73 +192,152 @@ def iterations(self, parameter): def write_sampler_metadata(self, sampler): """Writes the sampler's metadata.""" self.attrs['sampler'] = sampler.name - self[self.sampler_group].attrs['nwalkers'] = sampler.nwalkers + try: + self[self.sampler_group].attrs['nchains'] = sampler.nchains + except ValueError: + self[self.sampler_group].attrs['nwalkers'] = sampler.nwalkers # write the model's metadata sampler.model.write_metadata(self) - def write_acls(self, acls): - """Writes the given autocorrelation lengths. + @property + def is_burned_in(self): + """Returns whether or not chains are burned in. + + Raises a ``ValueError`` if no burn in tests were done. + """ + try: + return self[self.sampler_group]['is_burned_in'][()] + except KeyError: + raise ValueError("No burn in tests were performed") + + @property + def burn_in_iteration(self): + """Returns the burn in iteration of all the chains. + + Raises a ``ValueError`` if no burn in tests were done. + """ + try: + return self[self.sampler_group]['burn_in_iteration'][()] + except KeyError: + raise ValueError("No burn in tests were performed") + + @property + def burn_in_index(self): + """Returns the burn in index. + + This is the burn in iteration divided by the file's ``thinned_by``. + Requires the class that this is used with has a ``burn_in_iteration`` + attribute. + """ + return self.burn_in_iteration // self.thinned_by + + @property + def act(self): + """The autocorrelation time (ACT). + + This is the ACL times the file's thinned by. Raises a ``ValueError`` + if the ACT has not been calculated. + """ + try: + return self[self.sampler_group]['act'][()] + except KeyError: + raise ValueError("ACT has not been calculated") - The ACL of each parameter is saved to - ``[sampler_group]/acls/{param}']``. The maximum over all the - parameters is saved to the file's 'acl' attribute. + @act.setter + def act(self, act): + """Writes the autocorrelation time(s). + + ACT(s) are written to the ``sample_group`` as a dataset with name + ``act``. Parameters ---------- - acls : dict - A dictionary of ACLs keyed by the parameter. + act : array or int + ACT(s) to write. + """ + # pylint: disable=no-member + self.write_data('act', act, path=self.sampler_group) - Returns - ------- - ACL - The maximum of the acls that was written to the file. + @property + def raw_acts(self): + """Dictionary of parameter names -> raw autocorrelation time(s). + + Depending on the sampler, the autocorrelation times may be floats, + or [ntemps x] [nchains x] arrays. + + Raises a ``ValueError`` is no raw acts have been set. """ - group = self.sampler_group + '/acls/{}' - # write the individual acls - for param in acls: - try: - # we need to use the write_direct function because it's - # apparently the only way to update scalars in h5py - self[group.format(param)].write_direct( - numpy.array(acls[param])) - except KeyError: - # dataset doesn't exist yet - self[group.format(param)] = acls[param] - # write the maximum over all params - acl = numpy.array(list(acls.values())).max() - self[self.sampler_group].attrs['acl'] = acl - # set the default thin interval to be the acl (if it is finite) - if numpy.isfinite(acl): - self.thin_interval = int(numpy.ceil(acl)) + try: + group = self[self.sampler_group]['raw_acts'] + except KeyError: + raise ValueError("ACTs have not been calculated") + acts = {} + for param in group: + acts[param] = group[param][()] + return acts - def read_acls(self): - """Reads the acls of all the parameters. + @raw_acts.setter + def raw_acts(self, acts): + """Writes the raw autocorrelation times. - Returns - ------- - dict - A dictionary of the ACLs, keyed by the parameter name. + The ACT of each parameter is saved to + ``[sampler_group]/raw_acts/{param}']``. Works for all types of MCMC + samplers (independent chains, ensemble, parallel tempering). + + Parameters + ---------- + acts : dict + A dictionary of ACTs keyed by the parameter. """ - group = self[self.sampler_group]['acls'] - return {param: group[param].value for param in group.keys()} - - def write_burn_in(self, burn_in): - """Write the given burn-in data to the given filename.""" - group = self[self.sampler_group] - group.attrs['burn_in_test'] = burn_in.burn_in_test - group.attrs['is_burned_in'] = burn_in.is_burned_in - group.attrs['burn_in_iteration'] = burn_in.burn_in_iteration - # set the defaut thin_start to be the burn_in_index - self.thin_start = burn_in.burn_in_index - # write individual test data - for tst in burn_in.burn_in_data: - key = 'burn_in_tests/{}'.format(tst) - try: - attrs = group[key].attrs - except KeyError: - group.create_group(key) - attrs = group[key].attrs - self.write_kwargs_to_attrs(attrs, **burn_in.burn_in_data[tst]) + path = self.sampler_group + '/raw_acts' + for param in acts: + self.write_data(param, acts[param], path=path) + + @property + def acl(self): + """The autocorrelation length (ACL) of the samples. + + This is the autocorrelation time (ACT) divided by the file's + ``thinned_by`` attribute. Raises a ``ValueError`` if the ACT has not + been calculated. + """ + return self.act / self.thinned_by + + @acl.setter + def acl(self, acl): + """Sets the autocorrelation length (ACL) of the samples. + + This will convert the given value(s) to autocorrelation time(s) and + save to the ``act`` attribute; see that attribute for details. + """ + self.act = acl * self.thinned_by + + @property + def raw_acls(self): + """Dictionary of parameter names -> raw autocorrelation length(s). + + Depending on the sampler, the autocorrelation lengths may be floats, + or [ntemps x] [nchains x] arrays. + + The ACLs are the autocorrelation times (ACT) divided by the file's + ``thinned_by`` attribute. Raises a ``ValueError`` is no raw acts have + been set. + """ + return {p: self.raw_acts[p] / self.thinned_by for p in self.raw_acts} + + @raw_acls.setter + def raw_acls(self, acls): + """Sets the raw autocorrelation lengths. + + The given ACLs are converted to autocorrelation times (ACTs) and saved + to the ``raw_acts`` attribute; see that attribute for details. + + Parameters + ---------- + acls : dict + A dictionary of ACLs keyed by the parameter. + """ + self.raw_acts = {p: acls[p] * self.thinned_by for p in acls} @staticmethod def extra_args_parser(parser=None, skip_args=None, **kwargs): @@ -281,7 +376,7 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): if 'thin-start' not in skip_args: act = parser.add_argument( "--thin-start", type=int, default=None, - help="Sample number to start collecting samples to plot. If " + help="Sample number to start collecting samples. If " "none provided, will use the input file's `thin_start` " "attribute.") actions.append(act) @@ -294,7 +389,7 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): if 'thin-end' not in skip_args: act = parser.add_argument( "--thin-end", type=int, default=None, - help="Sample number to stop collecting samples to plot. If " + help="Sample number to stop collecting samples. If " "none provided, will use the input file's `thin_end` " "attribute.") actions.append(act) @@ -306,172 +401,413 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): "load the last iteration. This overrides " "the thin-start/interval/end options.") actions.append(act) - if 'walkers' not in skip_args: + if 'walkers' not in skip_args and 'chains' not in skip_args: act = parser.add_argument( - "--walkers", type=int, nargs="+", default=None, + "--walkers", "--chains", type=int, nargs="+", default=None, help="Only retrieve samples from the listed " - "walkers. Default is to retrieve from all " - "walkers.") + "walkers/chains. Default is to retrieve from all " + "walkers/chains.") actions.append(act) return parser, actions -class SingleTempMCMCIO(object): - """Provides functions for reading/writing samples from an MCMC sampler. +class MCMCMetadataIO(object): + """Provides functions for reading/writing metadata to file for MCMCs in + which all chains are independent of each other. - These functions will work for samplers that have 1 or more walkers, with - only a single temperature. + Overrides the ``BaseInference`` file's ``thin_start`` and ``thin_interval`` + attributes. Instead of integers, these return arrays. """ + @property + def thin_start(self): + """Returns the default thin start to use for reading samples. + + If burn-in tests were done, this will return the burn-in index of every + chain that has burned in. The start index for chains that have not + burned in will be greater than the number of samples, so that those + chains return no samples. If no burn-in tests were done, returns 0 + for all chains. + """ + # pylint: disable=no-member + try: + thin_start = self.burn_in_index + # replace any that have not been burned in with the number + # of iterations; this will cause those chains to not return + # any samples + thin_start[~self.is_burned_in] = \ + int(numpy.ceil(self.niterations/self.thinned_by)) + return thin_start + except ValueError: + # no burn in, just return array of zeros + return numpy.zeros(self.nchains, dtype=int) - def write_samples(self, samples, parameters=None, last_iteration=None, - samples_group=None, thin_by=None): - """Writes samples to the given file. + @property + def thin_interval(self): + """Returns the default thin interval to use for reading samples. - Results are written to ``samples_group/{vararg}``, where ``{vararg}`` - is the name of a model params. The samples are written as an - ``nwalkers x niterations`` array. If samples already exist, the new - samples are appended to the current. + If a finite ACL exists in the file, will return that. Otherwise, + returns 1. + """ + try: + acl = self.acl + except ValueError: + return numpy.ones(self.nchains, dtype=int) + # replace any infs with the number of samples + acl[numpy.isinf(acl)] = self.niterations / self.thinned_by + return numpy.ceil(acl).astype(int) - If the current samples on disk have been thinned (determined by the - ``thinned_by`` attribute in the samples group), then the samples will - be thinned by the same amount before being written. The thinning is - started at the sample in ``samples`` that occured at the iteration - equal to the last iteration on disk plus the ``thinned_by`` interval. - If this iteration is larger than the iteration of the last given - sample, then none of the samples will be written. - Parameters - ----------- - samples : dict - The samples to write. Each array in the dictionary should have - shape nwalkers x niterations. - parameters : list, optional - Only write the specified parameters to the file. If None, will - write all of the keys in the ``samples`` dict. - last_iteration : int, optional - The iteration of the last sample. If the file's ``thinned_by`` - attribute is > 1, this is needed to determine where to start - thinning the samples such that the interval between the last sample - currently on disk and the first new sample is the same as all of - the other samples. - samples_group : str, optional - Which group to write the samples to. Default (None) will result - in writing to "samples". - thin_by : int, optional - Override the ``thinned_by`` attribute in the file with the given - value. **Only set this if you are using this function to write - something other than inference samples!** +class EnsembleMCMCMetadataIO(object): + """Provides functions for reading/writing metadata to file for ensemble + MCMCs. + """ + @property + def thin_start(self): + """Returns the default thin start to use for reading samples. + + If burn-in tests were done, returns the burn in index. Otherwise, + returns 0. """ - nwalkers, nsamples = list(samples.values())[0].shape - assert all(p.shape == (nwalkers, nsamples) - for p in samples.values()), ( - "all samples must have the same shape") - if samples_group is None: - samples_group = self.samples_group - if parameters is None: - parameters = samples.keys() - # thin the samples - samples = thin_samples_for_writing(self, samples, parameters, - last_iteration, samples_group, - thin_by=thin_by) - # loop over number of dimensions - group = samples_group + '/{name}' - for param in parameters: - dataset_name = group.format(name=param) - data = samples[param] - # check that there's something to write after thinning - if data.shape[1] == 0: - # nothing to write, move along - continue - try: - fp_nsamples = self[dataset_name].shape[-1] - istart = fp_nsamples - istop = istart + data.shape[1] - if istop > fp_nsamples: - # resize the dataset - self[dataset_name].resize(istop, axis=1) - except KeyError: - # dataset doesn't exist yet - istart = 0 - istop = istart + data.shape[1] - self.create_dataset(dataset_name, (nwalkers, istop), - maxshape=(nwalkers, None), - dtype=data.dtype, - fletcher32=True) - self[dataset_name][:, istart:istop] = data - - def read_raw_samples(self, fields, - thin_start=None, thin_interval=None, thin_end=None, - iteration=None, walkers=None, flatten=True, - group=None): - """Base function for reading samples. + try: + return self.burn_in_index + except ValueError: + # no burn in, just return 0 + return 0 - Parameters - ----------- - fields : list - The list of field names to retrieve. - thin_start : int, optional - Start reading from the given iteration. Default is to start from - the first iteration. - thin_interval : int, optional - Only read every ``thin_interval`` -th sample. Default is 1. - thin_end : int, optional - Stop reading at the given iteration. Default is to end at the last - iteration. - iteration : int, optional - Only read the given iteration. If this provided, it overrides - the ``thin_(start|interval|end)`` options. - walkers : int, optional - Only read from the given walkers. Default is to read all. - flatten : bool, optional - Flatten the samples to 1D arrays before returning. Otherwise, the - returned arrays will have shape (requested walkers x - requested iteration(s)). Default is True. - group : str, optional - The name of the group to read sample datasets from. Default is - the file's ``samples_group``. + @property + def thin_interval(self): + """Returns the default thin interval to use for reading samples. - Returns - ------- - dict - A dictionary of field name -> numpy array pairs. + If a finite ACL exists in the file, will return that. Otherwise, + returns 1. """ - if isinstance(fields, string_types): - fields = [fields] - # walkers to load - if walkers is not None: - widx = numpy.zeros(self.nwalkers, dtype=bool) - widx[walkers] = True - nwalkers = widx.sum() + try: + acl = self.acl + except ValueError: + acl = 1 + if numpy.isfinite(acl): + acl = int(numpy.ceil(acl)) else: - widx = slice(0, None) - nwalkers = self.nwalkers - # get the slice to use - if iteration is not None: - get_index = int(iteration) - niterations = 1 + acl = 1 + return acl + + +def write_samples(fp, samples, parameters=None, last_iteration=None, + samples_group=None, thin_by=None): + """Writes samples to the given file. + + This works for both standard MCMC and ensemble MCMC samplers without + parallel tempering. + + Results are written to ``samples_group/{vararg}``, where ``{vararg}`` + is the name of a model params. The samples are written as an + ``nwalkers x niterations`` array. If samples already exist, the new + samples are appended to the current. + + If the current samples on disk have been thinned (determined by the + ``thinned_by`` attribute in the samples group), then the samples will + be thinned by the same amount before being written. The thinning is + started at the sample in ``samples`` that occured at the iteration + equal to the last iteration on disk plus the ``thinned_by`` interval. + If this iteration is larger than the iteration of the last given + sample, then none of the samples will be written. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with CommonMCMCMetadataIO methods added. + samples : dict + The samples to write. Each array in the dictionary should have + shape nwalkers x niterations. + parameters : list, optional + Only write the specified parameters to the file. If None, will + write all of the keys in the ``samples`` dict. + last_iteration : int, optional + The iteration of the last sample. If the file's ``thinned_by`` + attribute is > 1, this is needed to determine where to start + thinning the samples such that the interval between the last sample + currently on disk and the first new sample is the same as all of + the other samples. + samples_group : str, optional + Which group to write the samples to. Default (None) will result + in writing to "samples". + thin_by : int, optional + Override the ``thinned_by`` attribute in the file with the given + value. **Only set this if you are using this function to write + something other than inference samples!** + """ + nwalkers, nsamples = list(samples.values())[0].shape + assert all(p.shape == (nwalkers, nsamples) + for p in samples.values()), ( + "all samples must have the same shape") + if samples_group is None: + samples_group = fp.samples_group + if parameters is None: + parameters = samples.keys() + # thin the samples + samples = thin_samples_for_writing(fp, samples, parameters, + last_iteration, samples_group, + thin_by=thin_by) + # loop over number of dimensions + group = samples_group + '/{name}' + for param in parameters: + dataset_name = group.format(name=param) + data = samples[param] + # check that there's something to write after thinning + if data.shape[1] == 0: + # nothing to write, move along + continue + try: + fp_nsamples = fp[dataset_name].shape[-1] + istart = fp_nsamples + istop = istart + data.shape[1] + if istop > fp_nsamples: + # resize the dataset + fp[dataset_name].resize(istop, axis=1) + except KeyError: + # dataset doesn't exist yet + istart = 0 + istop = istart + data.shape[1] + fp.create_dataset(dataset_name, (nwalkers, istop), + maxshape=(nwalkers, None), + dtype=data.dtype, + fletcher32=True) + fp[dataset_name][:, istart:istop] = data + + +def ensemble_read_raw_samples(fp, fields, thin_start=None, + thin_interval=None, thin_end=None, + iteration=None, walkers=None, flatten=True, + group=None): + """Base function for reading samples from ensemble MCMC files without + parallel tempering. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with EnsembleMCMCMetadataIO methods added. + fields : list + The list of field names to retrieve. + thin_start : int, optional + Start reading from the given iteration. Default is to start from + the first iteration. + thin_interval : int, optional + Only read every ``thin_interval`` -th sample. Default is 1. + thin_end : int, optional + Stop reading at the given iteration. Default is to end at the last + iteration. + iteration : int, optional + Only read the given iteration. If this provided, it overrides + the ``thin_(start|interval|end)`` options. + walkers : (list of) int, optional + Only read from the given walkers. Default (``None``) is to read all. + flatten : bool, optional + Flatten the samples to 1D arrays before returning. Otherwise, the + returned arrays will have shape (requested walkers x + requested iteration(s)). Default is True. + group : str, optional + The name of the group to read sample datasets from. Default is + the file's ``samples_group``. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + if isinstance(fields, string_types): + fields = [fields] + # walkers to load + widx, nwalkers = _ensemble_get_walker_index(fp, walkers) + # get the slice to use + get_index = _ensemble_get_index(fp, thin_start, thin_interval, thin_end, + iteration) + # load + if group is None: + group = fp.samples_group + group = group + '/{name}' + arrays = {} + for name in fields: + arr = fp[group.format(name=name)][widx, get_index] + niterations = arr.shape[-1] if iteration is None else 1 + if flatten: + arr = arr.flatten() else: - get_index = self.get_slice(thin_start=thin_start, - thin_end=thin_end, - thin_interval=thin_interval) - # we'll just get the number of iterations from the returned shape - niterations = None - # load - if group is None: - group = self.samples_group - group = group + '/{name}' - arrays = {} - for name in fields: - arr = self[group.format(name=name)][widx, get_index] - if niterations is None: - niterations = arr.shape[-1] - if flatten: - arr = arr.flatten() - else: - # ensure that the returned array is 2D - arr = arr.reshape((nwalkers, niterations)) - arrays[name] = arr - return arrays + # ensure that the returned array is 2D + arr = arr.reshape((nwalkers, niterations)) + arrays[name] = arr + return arrays + + +def _ensemble_get_walker_index(fp, walkers=None): + """Convenience function to determine which walkers to load. + + Parameters + ---------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with EnsembleMCMCMetadataIO methods added. + walkers : (list of) int, optional + Only read from the given walkers. Default (``None``) is to read all. + + Returns + ------- + widx : array or slice + The walker indices to load. + nwalkers : int + The number of walkers that will be loaded. + """ + if walkers is not None: + widx = numpy.zeros(fp.nwalkers, dtype=bool) + widx[walkers] = True + nwalkers = widx.sum() + else: + widx = slice(None, None) + nwalkers = fp.nwalkers + return widx, nwalkers + + +def _ensemble_get_index(fp, thin_start=None, thin_interval=None, thin_end=None, + iteration=None): + """Determines the sample indices to retrieve for an ensemble MCMC. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with EnsembleMCMCMetadataIO methods added. + thin_start : int, optional + Start reading from the given iteration. Default is to start from + the first iteration. + thin_interval : int, optional + Only read every ``thin_interval`` -th sample. Default is 1. + thin_end : int, optional + Stop reading at the given iteration. Default is to end at the last + iteration. + iteration : int, optional + Only read the given iteration. If this provided, it overrides + the ``thin_(start|interval|end)`` options. + + Returns + ------- + slice or int + The indices to retrieve. + """ + if iteration is not None: + get_index = int(iteration) + else: + if thin_start is None: + thin_start = fp.thin_start + if thin_interval is None: + thin_interval = fp.thin_interval + if thin_end is None: + thin_end = fp.thin_end + get_index = fp.get_slice(thin_start=thin_start, + thin_interval=thin_interval, + thin_end=thin_end) + return get_index + + +def _get_index(fp, chains, thin_start=None, thin_interval=None, thin_end=None, + iteration=None): + """Determines the sample indices to retrieve for an MCMC with independent + chains. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to read samples from. Must be an instance of + BaseInferenceFile with EnsembleMCMCMetadataIO methods added. + chains : array of int + The chains to load. + thin_start : array or int, optional + Start reading from the given sample. May either provide an array + indicating the start index for each chain, or an integer. If the + former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used + for all chains. Default (None) is to use the file's ``thin_start`` + attribute. + thin_interval : array or int, optional + Only read every ``thin_interval``-th sample. May either provide an + array indicating the interval to use for each chain, or an integer. If + the former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used for + all chains. Default (None) is to use the file's ``thin_interval`` + attribute. + thin_end : array or int, optional + Stop reading at the given sample index. May either provide an + array indicating the end index to use for each chain, or an integer. If + the former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used for + all chains. Default (None) is to use the the file's ``thin_end`` + attribute. + iteration : int, optional + Only read the given iteration from all chains. If provided, it + overrides the ``thin_(start|interval|end)`` options. + + Returns + ------- + get_index : list of slice or int + The indices to retrieve. + """ + nchains = len(chains) + # convenience function to get the right thin start/interval/end + if iteration is not None: + get_index = [int(iteration)]*nchains + else: + # get the slice arguments + thin_start = _format_slice_arg(thin_start, fp.thin_start, chains) + thin_interval = _format_slice_arg(thin_interval, fp.thin_interval, + chains) + thin_end = _format_slice_arg(thin_end, fp.thin_end, chains) + # the slices to use for each chain + get_index = [fp.get_slice(thin_start=thin_start[ci], + thin_interval=thin_interval[ci], + thin_end=thin_end[ci]) + for ci in range(nchains)] + return get_index + + +def _format_slice_arg(value, default, chains): + """Formats a start/interval/end argument for picking out chains. + + Parameters + ---------- + value : None, int, array or list of int + The thin-start/interval/end value to format. ``None`` indicates the + user did not specify anything, in which case ``default`` will be used. + If an integer, then it will be repeated to match the length of + ``chains```. If an array or list, it must have the same length as + ``chains``. + default : array + What to use instead if ``value`` is ``None``. + chains : array of int + The index values of chains that will be loaded. + + Returns + ------- + array + Array giving the value to use for each chain in ``chains``. The array + will have the same length as ``chains``. + """ + if value is None and default is None: + # no value provided, and default is None, just return Nones with the + # same length as chains + value = [None]*len(chains) + elif value is None: + # use the default, with the desired values extracted + value = default[chains] + elif isinstance(value, (int, numpy.int_)): + # a single integer was provided, repeat into an array + value = numpy.repeat(value, len(chains)) + elif len(value) != len(chains): + # a list of values was provided, but the length does not match the + # chains, raise an error + raise ValueError("Number of requested thin-start/interval/end values " + "({}) does not match number of requested chains ({})" + .format(len(value), len(chains))) + return value def thin_samples_for_writing(fp, samples, parameters, last_iteration, @@ -483,7 +819,7 @@ def thin_samples_for_writing(fp, samples, parameters, last_iteration, Parameters ---------- - fp : MCMCMetadataIO instance + fp : CommonMCMCMetadataIO instance The file the sampels will be written to. Needed to determine the thin interval used on disk. samples : dict @@ -534,3 +870,42 @@ def thin_samples_for_writing(fp, samples, parameters, last_iteration, else: thinned_samples = samples return thinned_samples + + +def nsamples_in_chain(start_iter, interval, niterations): + """Calculates the number of samples in an MCMC chain given a thinning + start, end, and interval. + + This function will work with either python scalars, or numpy arrays. + + Parameters + ---------- + start_iter : (array of) int + Start iteration. If negative, will count as being how many iterations + to start before the end; otherwise, counts how many iterations to + start before the beginning. If this is larger than niterations, will + just return 0. + interval : (array of) int + Thinning interval. + niterations : (array of) int + The number of iterations. + + Returns + ------- + numpy.int_ + The number of samples in a chain, >= 0. + """ + # this is written in a slightly wonky way so that it will work with either + # python scalars or numpy arrays; it is equivalent to: + # if start_iter < 0: + # count = min(abs(start_iter), niterations) + # else: + # count = max(niterations - start_iter, 0) + slt0 = start_iter < 0 + sgt0 = start_iter >= 0 + count = slt0*abs(start_iter) + sgt0*(niterations - start_iter) + # ensure count is in [0, niterations] + cgtn = count > niterations + cok = (count >= 0) & (count <= niterations) + count = cgtn*niterations + cok*count + return numpy.ceil(count / interval).astype(int) diff --git a/pycbc/inference/io/base_multitemper.py b/pycbc/inference/io/base_multitemper.py index fc908de9b68..938feaf475b 100644 --- a/pycbc/inference/io/base_multitemper.py +++ b/pycbc/inference/io/base_multitemper.py @@ -27,8 +27,10 @@ from __future__ import absolute_import import argparse from six import string_types -from .base_mcmc import (MCMCMetadataIO, thin_samples_for_writing) import numpy +from .base_mcmc import (CommonMCMCMetadataIO, thin_samples_for_writing, + _ensemble_get_index, _ensemble_get_walker_index, + _get_index) class ParseTempsArg(argparse.Action): """Argparse action that will parse temps argument. @@ -66,9 +68,9 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, temps) -class MultiTemperedMetadataIO(MCMCMetadataIO): +class CommonMultiTemperedMetadataIO(CommonMCMCMetadataIO): """Adds support for reading/writing multi-tempered metadata to - MCMCMetadatIO. + :py:class:`~pycbc.inference.io.base_mcmc.CommonMCMCMetadatIO`. """ @property def ntemps(self): @@ -78,7 +80,8 @@ def ntemps(self): def write_sampler_metadata(self, sampler): """Adds writing ntemps to file. """ - super(MultiTemperedMetadataIO, self).write_sampler_metadata(sampler) + super(CommonMultiTemperedMetadataIO, self).write_sampler_metadata( + sampler) self[self.sampler_group].attrs["ntemps"] = sampler.ntemps @staticmethod @@ -87,7 +90,7 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): """ if skip_args is None: skip_args = [] - parser, actions = MCMCMetadataIO.extra_args_parser( + parser, actions = CommonMCMCMetadataIO.extra_args_parser( parser=parser, skip_args=skip_args, **kwargs) if 'temps' not in skip_args: act = parser.add_argument( @@ -100,169 +103,305 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): return parser, actions -class MultiTemperedMCMCIO(object): - """Provides functions for reading/writing samples from a parallel-tempered - MCMC sampler. +def write_samples(fp, samples, parameters=None, last_iteration=None, + samples_group=None, thin_by=None): + """Writes samples to the given file. + + This works both for standard MCMC and ensemble MCMC samplers with + parallel tempering. + + Results are written to ``samples_group/{vararg}``, where ``{vararg}`` + is the name of a model params. The samples are written as an + ``ntemps x nwalkers x niterations`` array. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with CommonMultiTemperedMetadataIO methods added. + samples : dict + The samples to write. Each array in the dictionary should have + shape ntemps x nwalkers x niterations. + parameters : list, optional + Only write the specified parameters to the file. If None, will + write all of the keys in the ``samples`` dict. + last_iteration : int, optional + The iteration of the last sample. If the file's ``thinned_by`` + attribute is > 1, this is needed to determine where to start + thinning the samples to match what has already been stored on disk. + samples_group : str, optional + Which group to write the samples to. Default (None) will result + in writing to "samples". + thin_by : int, optional + Override the ``thinned_by`` attribute in the file with the given + value. **Only set this if you are using this function to write + something other than inference samples!** """ - def write_samples(self, samples, parameters=None, last_iteration=None, - samples_group=None, thin_by=None): - """Writes samples to the given file. + ntemps, nwalkers, niterations = tuple(samples.values())[0].shape + assert all(p.shape == (ntemps, nwalkers, niterations) + for p in samples.values()), ( + "all samples must have the same shape") + if samples_group is None: + samples_group = fp.samples_group + if parameters is None: + parameters = list(samples.keys()) + # thin the samples + samples = thin_samples_for_writing(fp, samples, parameters, + last_iteration, samples_group, + thin_by=thin_by) + # loop over number of dimensions + group = samples_group + '/{name}' + for param in parameters: + dataset_name = group.format(name=param) + data = samples[param] + # check that there's something to write after thinning + if data.shape[2] == 0: + # nothing to write, move along + continue + try: + fp_niterations = fp[dataset_name].shape[-1] + istart = fp_niterations + istop = istart + data.shape[2] + if istop > fp_niterations: + # resize the dataset + fp[dataset_name].resize(istop, axis=2) + except KeyError: + # dataset doesn't exist yet + istart = 0 + istop = istart + data.shape[2] + fp.create_dataset(dataset_name, (ntemps, nwalkers, istop), + maxshape=(ntemps, nwalkers, None), + dtype=data.dtype, + fletcher32=True) + fp[dataset_name][:, :, istart:istop] = data - Results are written to ``samples_group/{vararg}``, where ``{vararg}`` - is the name of a model params. The samples are written as an - ``ntemps x nwalkers x niterations`` array. - Parameters - ----------- - samples : dict - The samples to write. Each array in the dictionary should have - shape ntemps x nwalkers x niterations. - parameters : list, optional - Only write the specified parameters to the file. If None, will - write all of the keys in the ``samples`` dict. - last_iteration : int, optional - The iteration of the last sample. If the file's ``thinned_by`` - attribute is > 1, this is needed to determine where to start - thinning the samples to match what has already been stored on disk. - samples_group : str, optional - Which group to write the samples to. Default (None) will result - in writing to "samples". - thin_by : int, optional - Override the ``thinned_by`` attribute in the file with the given - value. **Only set this if you are using this function to write - something other than inference samples!** - """ - ntemps, nwalkers, niterations = tuple(samples.values())[0].shape - assert all(p.shape == (ntemps, nwalkers, niterations) - for p in samples.values()), ( - "all samples must have the same shape") - if samples_group is None: - samples_group = self.samples_group - if parameters is None: - parameters = list(samples.keys()) - # thin the samples - samples = thin_samples_for_writing(self, samples, parameters, - last_iteration, samples_group, - thin_by=thin_by) - # loop over number of dimensions - group = samples_group + '/{name}' - for param in parameters: - dataset_name = group.format(name=param) - data = samples[param] - # check that there's something to write after thinning - if data.shape[2] == 0: - # nothing to write, move along +def read_raw_samples(fp, fields, + thin_start=None, thin_interval=None, thin_end=None, + iteration=None, temps='all', chains=None, + flatten=True, group=None): + """Base function for reading samples from a collection of independent + MCMC chains file with parallel tempering. + + This may collect differing numbering of samples from each chains, + depending on the thinning settings for each chain. If not flattened the + returned array will have dimensions requested temps x requested chains x + max samples, where max samples is the largest number of samples retrieved + from a single chain. Chains that retrieve fewer samples will be padded with + ``numpy.nan``. If flattened, the NaNs are removed prior to returning. + + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to read samples from. Must be an instance of + BaseInferenceFile with CommonMultiTemperedMetadataIO methods added. + fields : list + The list of field names to retrieve. + thin_start : array or int, optional + Start reading from the given sample. May either provide an array + indicating the start index for each chain, or an integer. If the + former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used + for all chains. Default (None) is to use the file's ``thin_start`` + attribute. + thin_interval : array or int, optional + Only read every ``thin_interval``-th sample. May either provide an + array indicating the interval to use for each chain, or an integer. If + the former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used for + all chains. Default (None) is to use the file's ``thin_interval`` + attribute. + thin_end : array or int, optional + Stop reading at the given sample index. May either provide an + array indicating the end index to use for each chain, or an integer. If + the former, the array must have the same length as the number of chains + that will be retrieved. If the latter, the given value will be used for + all chains. Default (None) is to use the the file's ``thin_end`` + attribute. + iteration : int, optional + Only read the given iteration from all chains. If provided, it + overrides the ``thin_(start|interval|end)`` options. + temps : 'all' or (list of) int, optional + The temperature index (or list of indices) to retrieve. To retrieve + all temperates pass 'all', or a list of all of the temperatures. + Default is 'all'. + chains : (list of) int, optional + Only read from the given chains. Default is to read all. + flatten : bool, optional + Remove NaNs and flatten the samples to 1D arrays before returning. + Otherwise, the returned arrays will have shape (requested temps x + requested chains x max requested iteration(s)), with chains that return + fewer samples padded with NaNs. Default is True. + group : str, optional + The name of the group to read sample datasets from. Default is + the file's ``samples_group``. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + if isinstance(fields, string_types): + fields = [fields] + if group is None: + group = fp.samples_group + group = group + '/{name}' + # chains to load + if chains is None: + chains = numpy.arange(fp.nchains) + elif not isinstance(chains, (list, numpy.ndarray)): + chains = numpy.array([chains]).astype(int) + get_index = _get_index(fp, chains, thin_start, thin_interval, thin_end, + iteration) + # load the samples + arrays = {} + for name in fields: + dset = group.format(name=name) + # get the temperatures to load + tidx, selecttemps, ntemps = _get_temps_index(temps, fp, dset) + alist = [] + maxiters = 0 + for ii, cidx in enumerate(chains): + idx = get_index[ii] + # load the data + thisarr = fp[dset][tidx, cidx, idx] + if thisarr.size == 0: + # no samples were loaded; skip this chain + alist.append(None) continue - try: - fp_niterations = self[dataset_name].shape[-1] - istart = fp_niterations - istop = istart + data.shape[2] - if istop > fp_niterations: - # resize the dataset - self[dataset_name].resize(istop, axis=2) - except KeyError: - # dataset doesn't exist yet - istart = 0 - istop = istart + data.shape[2] - self.create_dataset(dataset_name, (ntemps, nwalkers, istop), - maxshape=(ntemps, nwalkers, - None), - dtype=data.dtype, - fletcher32=True) - self[dataset_name][:, :, istart:istop] = data + if isinstance(idx, (int, numpy.int_)): + # make sure the last dimension corresponds to iteration + thisarr = thisarr.reshape(list(thisarr.shape)+[1]) + # pull out the temperatures we need + if selecttemps: + thisarr = thisarr[temps, ...] + # make sure its 2D + thisarr = thisarr.reshape(ntemps, thisarr.shape[-1]) + alist.append(thisarr) + maxiters = max(maxiters, thisarr.shape[-1]) + # stack into a single array + arr = numpy.full((ntemps, len(chains), maxiters), numpy.nan) + for ii, thisarr in enumerate(alist): + if thisarr is not None: + arr[:, ii, :thisarr.shape[-1]] = thisarr + if flatten: + # flatten and remove nans + arr = arr.flatten() + arr = arr[~numpy.isnan(arr)] + arrays[name] = arr + return arrays - def read_raw_samples(self, fields, - thin_start=None, thin_interval=None, thin_end=None, - iteration=None, temps='all', walkers=None, - flatten=True, group=None): - """Base function for reading samples. - Parameters - ----------- - fields : list - The list of field names to retrieve. - thin_start : int, optional - Start reading from the given iteration. Default is to start from - the first iteration. - thin_interval : int, optional - Only read every ``thin_interval`` -th sample. Default is 1. - thin_end : int, optional - Stop reading at the given iteration. Default is to end at the last - iteration. - iteration : int, optional - Only read the given iteration. If this provided, it overrides - the ``thin_(start|interval|end)`` options. - temps : 'all' or (list of) int, optional - The temperature index (or list of indices) to retrieve. To retrieve - all temperates pass 'all', or a list of all of the temperatures. - Default is 'all'. - walkers : (list of) int, optional - Only read from the given walkers. Default is to read all. - flatten : bool, optional - Flatten the samples to 1D arrays before returning. Otherwise, the - returned arrays will have shape (requested temps x - requested walkers x requested iteration(s)). Default is True. - group : str, optional - The name of the group to read sample datasets from. Default is - the file's ``samples_group``. +def ensemble_read_raw_samples(fp, fields, thin_start=None, + thin_interval=None, thin_end=None, + iteration=None, temps='all', walkers=None, + flatten=True, group=None): + """Base function for reading samples from ensemble MCMC file with + parallel tempering. - Returns - ------- - array_class - An instance of the given array class populated with values - retrieved from the fields. - """ - if isinstance(fields, string_types): - fields = [fields] - # walkers to load - if walkers is not None: - widx = numpy.zeros(self.nwalkers, dtype=bool) - widx[walkers] = True - nwalkers = widx.sum() + Parameters + ----------- + fp : BaseInferenceFile + Open file handler to write files to. Must be an instance of + BaseInferenceFile with CommonMultiTemperedMetadataIO methods added. + fields : list + The list of field names to retrieve. + thin_start : int, optional + Start reading from the given iteration. Default is to start from + the first iteration. + thin_interval : int, optional + Only read every ``thin_interval`` -th sample. Default is 1. + thin_end : int, optional + Stop reading at the given iteration. Default is to end at the last + iteration. + iteration : int, optional + Only read the given iteration. If this provided, it overrides + the ``thin_(start|interval|end)`` options. + temps : 'all' or (list of) int, optional + The temperature index (or list of indices) to retrieve. To retrieve + all temperates pass 'all', or a list of all of the temperatures. + Default is 'all'. + walkers : (list of) int, optional + Only read from the given walkers. Default (``None``) is to read all. + flatten : bool, optional + Flatten the samples to 1D arrays before returning. Otherwise, the + returned arrays will have shape (requested temps x + requested walkers x requested iteration(s)). Default is True. + group : str, optional + The name of the group to read sample datasets from. Default is + the file's ``samples_group``. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + if isinstance(fields, string_types): + fields = [fields] + # walkers to load + widx, nwalkers = _ensemble_get_walker_index(fp, walkers) + # get the slice to use + get_index = _ensemble_get_index(fp, thin_start, thin_interval, thin_end, + iteration) + # load + if group is None: + group = fp.samples_group + group = group + '/{name}' + arrays = {} + for name in fields: + dset = group.format(name=name) + tidx, selecttemps, ntemps = _get_temps_index(temps, fp, dset) + arr = fp[dset][tidx, widx, get_index] + niterations = arr.shape[-1] if iteration is None else 1 + if selecttemps: + # pull out the temperatures we need + arr = arr[temps, ...] + if flatten: + arr = arr.flatten() else: - widx = slice(None, None) - nwalkers = self.nwalkers - # temperatures to load + # ensure that the returned array is 3D + arr = arr.reshape((ntemps, nwalkers, niterations)) + arrays[name] = arr + return arrays + + +def _get_temps_index(temps, fp, dataset): + """Convenience function to determine which temperatures to load. + + Parameters + ----------- + temps : 'all' or (list of) int + The temperature index (or list of indices) to retrieve. To retrieve + all temperates pass 'all', or a list of all of the temperatures. + fp : BaseInferenceFile + Open file handler to read samples from. Must be an instance of + BaseInferenceFile with CommonMultiTemperedMetadataIO methods added. + dataset : str + The name of the dataset that samples will be loaded from. + + Returns + ------- + tidx : slice or list of int + The temperature indices to load from the file. + selecttemps : bool + Whether specific temperatures need to be pulled out of the samples + array after it is loaded from the file. + ntemps : int + The number of temperatures that will be loaded. + """ + if temps == 'all': + # all temperatures were requested; just need to know how many + ntemps = fp[dataset].shape[0] + tidx = slice(None, None) selecttemps = False - if isinstance(temps, (int, numpy.int32, numpy.int64)): - tidx = temps - ntemps = 1 - else: - # temps is either 'all' or a list of temperatures; - # in either case, we'll get all of the temperatures from the file; - # if not 'all', then we'll pull out the ones we want - tidx = slice(None, None) - selecttemps = temps != 'all' - if selecttemps: - ntemps = len(temps) - else: - ntemps = self.ntemps - # get the slice to use - if iteration is not None: - get_index = int(iteration) - niterations = 1 - else: - get_index = self.get_slice(thin_start=thin_start, - thin_end=thin_end, - thin_interval=thin_interval) - # we'll just get the number of iterations from the returned shape - niterations = None - # load - if group is None: - group = self.samples_group - group = group + '/{name}' - arrays = {} - for name in fields: - arr = self[group.format(name=name)][tidx, widx, get_index] - if niterations is None: - niterations = arr.shape[-1] - # pull out the temperatures we need - if selecttemps: - arr = arr[temps, ...] - if flatten: - arr = arr.flatten() - else: - # ensure that the returned array is 3D - arr = arr.reshape((ntemps, nwalkers, niterations)) - arrays[name] = arr - return arrays + elif isinstance(temps, (int, numpy.int_)): + # only a single temperature is requested + ntemps = 1 + tidx = temps + selecttemps = False + else: + # a select set of temperatures are requested + tidx = slice(None, None) + ntemps = len(temps) + selecttemps = True + return tidx, selecttemps, ntemps diff --git a/pycbc/inference/io/emcee.py b/pycbc/inference/io/emcee.py index f1ae6cbc7e3..c793e29fc26 100644 --- a/pycbc/inference/io/emcee.py +++ b/pycbc/inference/io/emcee.py @@ -26,14 +26,53 @@ import numpy from .base_sampler import BaseSamplerFile -from .base_mcmc import (MCMCMetadataIO, SingleTempMCMCIO) +from .base_mcmc import (EnsembleMCMCMetadataIO, CommonMCMCMetadataIO, + write_samples, ensemble_read_raw_samples) -class EmceeFile(SingleTempMCMCIO, MCMCMetadataIO, BaseSamplerFile): +class EmceeFile(EnsembleMCMCMetadataIO, CommonMCMCMetadataIO, BaseSamplerFile): """Class to handle file IO for the ``emcee`` sampler.""" name = 'emcee_file' + def write_samples(self, samples, **kwargs): + r"""Writes samples to the given file. + + Calls :py:func:`base_mcmc.write_samples`. See that function for + details. + + Parameters + ---------- + samples : dict + The samples to write. Each array in the dictionary should have + shape nwalkers x niterations. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_mcmc.write_samples`. + """ + write_samples(self, samples, **kwargs) + + def read_raw_samples(self, fields, **kwargs): + r"""Base function for reading samples. + + Calls :py:func:`base_mcmc.ensemble_read_raw_samples`. See that function + for details. + + Parameters + ----------- + fields : list + The list of field names to retrieve. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_mcmc.ensemble_read_raw_samples`. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + return ensemble_read_raw_samples(self, fields, **kwargs) + def read_acceptance_fraction(self, walkers=None): """Reads the acceptance fraction. diff --git a/pycbc/inference/io/emcee_pt.py b/pycbc/inference/io/emcee_pt.py index a5f209c132e..9f35b005b21 100644 --- a/pycbc/inference/io/emcee_pt.py +++ b/pycbc/inference/io/emcee_pt.py @@ -22,10 +22,13 @@ import numpy from .base_sampler import BaseSamplerFile -from .base_multitemper import (MultiTemperedMetadataIO, MultiTemperedMCMCIO) +from .base_mcmc import EnsembleMCMCMetadataIO +from .base_multitemper import (CommonMultiTemperedMetadataIO, + write_samples, + ensemble_read_raw_samples) -class EmceePTFile(MultiTemperedMCMCIO, MultiTemperedMetadataIO, +class EmceePTFile(EnsembleMCMCMetadataIO, CommonMultiTemperedMetadataIO, BaseSamplerFile): """Class to handle file IO for the ``emcee`` sampler.""" @@ -36,6 +39,44 @@ def betas(self): """The betas that were used.""" return self[self.sampler_group].attrs["betas"] + def write_samples(self, samples, **kwargs): + r"""Writes samples to the given file. + + Calls :py:func:`base_multitemper.write_samples`. See that function for + details. + + Parameters + ---------- + samples : dict + The samples to write. Each array in the dictionary should have + shape ntemps x nwalkers x niterations. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.write_samples`. + """ + write_samples(self, samples, **kwargs) + + def read_raw_samples(self, fields, **kwargs): + r"""Base function for reading samples. + + Calls :py:func:`base_multitemper.ensemble_read_raw_samples`. See that + function for details. + + Parameters + ----------- + fields : list + The list of field names to retrieve. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.ensemble_read_raw_samples`. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + return ensemble_read_raw_samples(self, fields, **kwargs) + def write_sampler_metadata(self, sampler): """Adds writing betas to MultiTemperedMCMCIO. """ diff --git a/pycbc/inference/io/epsie.py b/pycbc/inference/io/epsie.py index 070f7b6446e..c7128b38b21 100644 --- a/pycbc/inference/io/epsie.py +++ b/pycbc/inference/io/epsie.py @@ -23,10 +23,13 @@ from epsie import load_state from .base_sampler import BaseSamplerFile -from .base_multitemper import (MultiTemperedMCMCIO, MultiTemperedMetadataIO) +from .base_mcmc import MCMCMetadataIO +from .base_multitemper import (CommonMultiTemperedMetadataIO, + write_samples, + read_raw_samples) -class EpsieFile(MultiTemperedMCMCIO, MultiTemperedMetadataIO, +class EpsieFile(MCMCMetadataIO, CommonMultiTemperedMetadataIO, BaseSamplerFile): """Class to handle IO for Epsie's parallel-tempered sampler.""" @@ -57,10 +60,7 @@ def write_sampler_metadata(self, sampler): """ super(EpsieFile, self).write_sampler_metadata(sampler) self[self.sampler_group].attrs['seed'] = sampler.seed - try: - self[self.sampler_group]["betas"][:] = sampler.betas - except KeyError: - self[self.sampler_group]["betas"] = sampler.betas + self.write_data("betas", sampler.betas, path=self.sampler_group) def thin(self, thin_interval): """Thins the samples on disk to the given thinning interval. @@ -68,10 +68,14 @@ def thin(self, thin_interval): Also thins the acceptance ratio and the temperature data, both of which are stored in the ``sampler_info`` group. """ - # thin the samples + # We'll need to know what the new interval to thin by will be + # so we can properly thin the acceptance ratio and temperatures swaps. + # We need to do this before calling the base thin, as we need to know + # what the current thinned by is. + new_interval = thin_interval // self.thinned_by + # now thin the samples super(EpsieFile, self).thin(thin_interval) # thin the acceptance ratio - new_interval = thin_interval // self.thinned_by self._thin_data(self.sampler_group, ['acceptance_ratio'], new_interval) # thin the temperature swaps; since these may not happen every @@ -79,9 +83,49 @@ def thin(self, thin_interval): ts_group = '/'.join([self.sampler_group, 'temperature_swaps']) ts_thin_interval = new_interval // self.swap_interval if ts_thin_interval > 1: - self._thin_data(ts_group, ['swap_index', 'acceptance_ratio'], + self._thin_data(ts_group, ['swap_index'], + ts_thin_interval) + self._thin_data(ts_group, ['acceptance_ratio'], ts_thin_interval) + def write_samples(self, samples, **kwargs): + r"""Writes samples to the given file. + + Calls :py:func:`base_multitemper.write_samples`. See that function for + details. + + Parameters + ---------- + samples : dict + The samples to write. Each array in the dictionary should have + shape ntemps x nwalkers x niterations. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.write_samples`. + """ + write_samples(self, samples, **kwargs) + + def read_raw_samples(self, fields, **kwargs): + r"""Base function for reading samples. + + Calls :py:func:`base_multitemper.read_raw_samples`. See that + function for details. + + Parameters + ----------- + fields : list + The list of field names to retrieve. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.read_raw_samples`. + + Returns + ------- + dict + A dictionary of field name -> numpy array pairs. + """ + return read_raw_samples(self, fields, **kwargs) + def write_acceptance_ratio(self, acceptance_ratio, last_iteration=None): """Writes the acceptance ratios to the sampler info group. @@ -213,3 +257,16 @@ def validate(self): # corrupted for some reason valid = False return valid + + @staticmethod + def _get_optional_args(args, opts, err_on_missing=False, **kwargs): + # need this to make sure options called "walkers" are renamed to + # "chains" + parsed = BaseSamplerFile._get_optional_args( + args, opts, err_on_missing=err_on_missing, **kwargs) + try: + chains = parsed.pop('walkers') + parsed['chains'] = chains + except KeyError: + pass + return parsed diff --git a/pycbc/inference/sampler/base_mcmc.py b/pycbc/inference/sampler/base_mcmc.py index 2d74b05791a..c931beffcb7 100644 --- a/pycbc/inference/sampler/base_mcmc.py +++ b/pycbc/inference/sampler/base_mcmc.py @@ -36,7 +36,8 @@ from pycbc.workflow import ConfigParser from pycbc.filter import autocorrelation -from pycbc.inference.io import validate_checkpoint_files +from pycbc.inference.io import (validate_checkpoint_files, loadfile) +from pycbc.inference.io.base_mcmc import nsamples_in_chain from .base import setup_output from .base import initial_dist_from_config @@ -196,7 +197,7 @@ class BaseMCMC(object): ---------- p0 pos - nwalkers + nchains niterations checkpoint_interval checkpoint_signal @@ -207,14 +208,16 @@ class BaseMCMC(object): thin_safety_factor burn_in effective_nsamples - acls - acts + acl + raw_acls + act + raw_acts """ _lastclear = None # the iteration when samples were cleared from memory _itercounter = None # the number of iterations since the last clear _pos = None _p0 = None - _nwalkers = None + _nchains = None _burn_in = None _acls = None _checkpoint_interval = None @@ -229,18 +232,24 @@ def base_shape(self): """What shape the sampler's samples arrays are in, excluding the iterations dimension. - For example, if a sampler uses 20 walkers and 3 temperatures, this + For example, if a sampler uses 20 chains and 3 temperatures, this would be ``(3, 20)``. If a sampler only uses a single walker and no temperatures this would be ``()``. """ pass @property - def nwalkers(self): - """The number of walkers used.""" - if self._nwalkers is None: - raise ValueError("number of walkers not set") - return self._nwalkers + def nchains(self): + """The number of chains used.""" + if self._nchains is None: + raise ValueError("number of chains not set") + return self._nchains + + @nchains.setter + def nchains(self, value): + """Sets the number of chains.""" + # we'll actually store it to the nchains attribute + self._nchains = int(value) @property def niterations(self): @@ -311,7 +320,7 @@ def max_samples_per_chain(self, n): # effective samples if self.target_eff_nsamples is not None: target_samps_per_chain = int(numpy.ceil( - self.target_eff_nsamples / self.nwalkers)) + self.target_eff_nsamples / self.nchains)) if n <= target_samps_per_chain: raise ValueError("max samples per chain must be > target " "effective number of samples per walker " @@ -380,7 +389,7 @@ def pos(self): @property def p0(self): - """A dictionary of the initial position of the walkers. + """A dictionary of the initial position of the chains. This is set by using ``set_p0``. If not set yet, a ``ValueError`` is raised when the attribute is accessed. @@ -393,7 +402,7 @@ def p0(self): return p0 def set_p0(self, samples_file=None, prior=None): - """Sets the initial position of the walkers. + """Sets the initial position of the chains. Parameters ---------- @@ -478,9 +487,9 @@ def run(self): nsamples = fp.effective_nsamples elif self.target_niterations is not None: # the number of samples is the number of iterations times the - # number of walkers - target_nsamples = self.nwalkers * self.target_niterations - nsamples = self._lastclear * self.nwalkers + # number of chains + target_nsamples = self.nchains * self.target_niterations + nsamples = self._lastclear * self.nchains else: raise ValueError("must set either target_eff_nsamples or " "target_niterations; see set_target") @@ -511,7 +520,7 @@ def run(self): logging.info("Have {} effective samples post burn in".format( nsamples)) else: - nsamples += iterinterval * self.nwalkers + nsamples += iterinterval * self.nchains @property def burn_in(self): @@ -522,24 +531,12 @@ def set_burn_in(self, burn_in): """Sets the object to use for doing burn-in tests.""" self._burn_in = burn_in - @property + @abstractmethod def effective_nsamples(self): """The effective number of samples post burn-in that the sampler has - acquired so far.""" - try: - act = numpy.array(list(self.acts.values())).max() - except (AttributeError, TypeError): - act = numpy.inf - if self.burn_in is None: - nperwalker = max(int(self.niterations // act), 1) - elif self.burn_in.is_burned_in: - nperwalker = int( - (self.niterations - self.burn_in.burn_in_iteration) // act) - # after burn in, we always have atleast 1 sample per walker - nperwalker = max(nperwalker, 1) - else: - nperwalker = 0 - return self.nwalkers * nperwalker + acquired so far. + """ + pass @abstractmethod def run_mcmc(self, niterations): @@ -589,32 +586,25 @@ def checkpoint(self): logging.info("No samples written due to thinning") else: # check for burn in, compute the acls - self.acls = None + self.raw_acls = None if self.burn_in is not None: logging.info("Updating burn in") self.burn_in.evaluate(self.checkpoint_file) - burn_in_index = self.burn_in.burn_in_index - logging.info("Is burned in: %r", self.burn_in.is_burned_in) - if self.burn_in.is_burned_in: - logging.info("Burn-in iteration: %i", - int(self.burn_in.burn_in_iteration)) - else: - burn_in_index = 0 + # write + for fn in [self.checkpoint_file, self.backup_file]: + with self.io(fn, "a") as fp: + self.burn_in.write(fp) # Compute acls; the burn_in test may have calculated an acl and # saved it, in which case we don't need to do it again. - if self.acls is None: - logging.info("Computing acls") - self.acls = self.compute_acl(self.checkpoint_file, - start_index=burn_in_index) - logging.info("ACT: %s", - str(numpy.array(list(self.acts.values())).max())) - # write + if self.raw_acls is None: + logging.info("Computing autocorrelation time") + self.raw_acls = self.compute_acl(self.checkpoint_file) + # write acts, effective number of samples for fn in [self.checkpoint_file, self.backup_file]: with self.io(fn, "a") as fp: - if self.burn_in is not None: - fp.write_burn_in(self.burn_in) - if self.acls is not None: - fp.write_acls(self.acls) + if self.raw_acls is not None: + fp.raw_acls = self.raw_acls + fp.acl = self.acl # write effective number of samples fp.write_effective_nsamples(self.effective_nsamples) # check validity @@ -737,27 +727,57 @@ def set_thin_interval_from_config(self, cp, section): self.max_samples_per_chain = max_samps_per_chain @property - def acls(self): - """The autocorrelation lengths of each parameter's thinned chain.""" + def raw_acls(self): + """Dictionary of parameter names -> autocorrelation lengths. + + Depending on the sampler, the ACLs may be an integer, or an arrray of + values per chain and/or per temperature. + + Returns ``None`` if no ACLs have been calculated. + """ return self._acls - @acls.setter - def acls(self, acls): - """Sets the acls.""" + @raw_acls.setter + def raw_acls(self, acls): + """Sets the raw acls.""" self._acls = acls + @abstractmethod + def acl(self): + """The autocorrelation length. + + This method should convert the raw ACLs into an integer or array that + can be used to extract independent samples from a chain. + """ + pass + @property - def acts(self): - """The autocorrelation times of each parameter. + def raw_acts(self): + """Dictionary of parameter names -> autocorrelation time(s). - The autocorrelation time is defined as the ACL times the - ``thin_interval``. It gives the number of iterations between - independent samples. + Returns ``None`` if no ACLs have been calculated. """ - if self.acls is None: + acls = self.raw_acls + if acls is None: return None return {p: acl * self.thin_interval - for (p, acl) in self.acls.items()} + for (p, acl) in acls.items()} + + @property + def act(self): + """The autocorrelation time(s). + + The autocorrelation time is defined as the autocorrelation length times + the ``thin_interval``. It gives the number of iterations between + independent samples. Depending on the sampler, this may either be + a single integer or an array of values. + + Returns ``None`` if no ACLs have been calculated. + """ + acl = self.acl + if acl is None: + return None + return acl * self.thin_interval @abstractmethod def compute_acf(cls, filename, **kwargs): @@ -772,120 +792,174 @@ def compute_acl(cls, filename, **kwargs): pass -class MCMCAutocorrSupport(object): - """Provides class methods for calculating ensemble ACFs/ACLs. - """ +class EnsembleSupport(object): + """Adds support for ensemble MCMC samplers.""" - @classmethod - def compute_acf(cls, filename, start_index=None, end_index=None, - per_walker=False, walkers=None, parameters=None): - """Computes the autocorrleation function of the model params in the - given file. + @property + def nwalkers(self): + """The number of walkers used. - By default, parameter values are averaged over all walkers at each - iteration. The ACF is then calculated over the averaged chain. An - ACF per-walker will be returned instead if ``per_walker=True``. + Alias of ``nchains``. + """ + return self.nchains - Parameters - ----------- - filename : str - Name of a samples file to compute ACFs for. - start_index : {None, int} - The start index to compute the acl from. If None, will try to use - the number of burn-in iterations in the file; otherwise, will start - at the first sample. - end_index : {None, int} - The end index to compute the acl to. If None, will go to the end - of the current iteration. - per_walker : optional, bool - Return the ACF for each walker separately. Default is False. - walkers : optional, int or array - Calculate the ACF using only the given walkers. If None (the - default) all walkers will be used. - parameters : optional, str or array - Calculate the ACF for only the given parameters. If None (the - default) will calculate the ACF for all of the model params. + @nwalkers.setter + def nwalkers(self, value): + """Sets the number of walkers.""" + # we'll actually store it to the nchains attribute + self.nchains = value - Returns - ------- - dict : - Dictionary of arrays giving the ACFs for each parameter. If - ``per-walker`` is True, the arrays will have shape - ``nwalkers x niterations``. - """ - acfs = {} - with cls._io(filename, 'r') as fp: - if parameters is None: - parameters = fp.variable_params - if isinstance(parameters, string_types): - parameters = [parameters] - for param in parameters: - if per_walker: - # just call myself with a single walker - if walkers is None: - walkers = numpy.arange(fp.nwalkers) - arrays = [ - cls.compute_acf(filename, start_index=start_index, - end_index=end_index, - per_walker=False, walkers=ii, - parameters=param)[param] - for ii in walkers] - acfs[param] = numpy.vstack(arrays) - else: - samples = fp.read_raw_samples( - param, thin_start=start_index, thin_interval=1, - thin_end=end_index, walkers=walkers, - flatten=False)[param] - samples = samples.mean(axis=0) - acfs[param] = autocorrelation.calculate_acf( - samples).numpy() - return acfs - - @classmethod - def compute_acl(cls, filename, start_index=None, end_index=None, - min_nsamples=10): - """Computes the autocorrleation length for all model params in the - given file. - - Parameter values are averaged over all walkers at each iteration. - The ACL is then calculated over the averaged chain. If an ACL cannot - be calculated because there are not enough samples, it will be set - to ``inf``. + @property + def acl(self): + """The autocorrelation length of the ensemble. - Parameters - ----------- - filename : str - Name of a samples file to compute ACLs for. - start_index : int, optional - The start index to compute the acl from. If None, will try to use - the number of burn-in iterations in the file; otherwise, will start - at the first sample. - end_index : int, optional - The end index to compute the acl to. If None, will go to the end - of the current iteration. - min_nsamples : int, optional - Require a minimum number of samples to compute an ACL. If the - number of samples per walker is less than this, will just set to - ``inf``. Default is 10. + This is calculated by taking the maximum over all of the ``raw_acls``. + This works for both single and parallel-tempered ensemble samplers. - Returns - ------- - dict - A dictionary giving the ACL for each parameter. + Returns ``None`` if no ACLs have been set. + """ + acls = self.raw_acls + if acls is None: + return None + return numpy.array(list(acls.values())).max() + + @property + def effective_nsamples(self): + """The effective number of samples post burn-in that the sampler has + acquired so far. """ - acls = {} - with cls._io(filename, 'r') as fp: - for param in fp.variable_params: + act = self.act + if act is None: + act = numpy.inf + if self.burn_in is None or not self.burn_in.is_burned_in: + start_iter = 0 + else: + start_iter = self.burn_in.burn_in_iteration + nperwalker = nsamples_in_chain(start_iter, act, self.niterations) + if self.burn_in is not None and self.burn_in.is_burned_in: + # after burn in, we always have atleast 1 sample per walker + nperwalker = max(nperwalker, 1) + return int(self.nwalkers * nperwalker) + + +# +# ============================================================================= +# +# Functions for computing autocorrelation lengths +# +# ============================================================================= +# + + +def ensemble_compute_acf(filename, start_index=None, end_index=None, + per_walker=False, walkers=None, parameters=None): + """Computes the autocorrleation function for an ensemble MCMC. + + By default, parameter values are averaged over all walkers at each + iteration. The ACF is then calculated over the averaged chain. An + ACF per-walker will be returned instead if ``per_walker=True``. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACFs for. + start_index : int, optional + The start index to compute the acl from. If None (the default), will + try to use the number of burn-in iterations in the file; otherwise, + will start at the first sample. + end_index : int, optional + The end index to compute the acl to. If None (the default), will go to + the end of the current iteration. + per_walker : bool, optional + Return the ACF for each walker separately. Default is False. + walkers : int or array, optional + Calculate the ACF using only the given walkers. If None (the + default) all walkers will be used. + parameters : str or array, optional + Calculate the ACF for only the given parameters. If None (the + default) will calculate the ACF for all of the model params. + + Returns + ------- + dict : + Dictionary of arrays giving the ACFs for each parameter. If + ``per-walker`` is True, the arrays will have shape + ``nwalkers x niterations``. + """ + acfs = {} + with loadfile(filename, 'r') as fp: + if parameters is None: + parameters = fp.variable_params + if isinstance(parameters, string_types): + parameters = [parameters] + for param in parameters: + if per_walker: + # just call myself with a single walker + if walkers is None: + walkers = numpy.arange(fp.nwalkers) + arrays = [ + ensemble_compute_acf(filename, start_index=start_index, + end_index=end_index, + per_walker=False, walkers=ii, + parameters=param)[param] + for ii in walkers] + acfs[param] = numpy.vstack(arrays) + else: samples = fp.read_raw_samples( param, thin_start=start_index, thin_interval=1, - thin_end=end_index, flatten=False)[param] + thin_end=end_index, walkers=walkers, + flatten=False)[param] samples = samples.mean(axis=0) - # if < min number of samples, just set to inf - if samples.size < min_nsamples: - acl = numpy.inf - else: - acl = autocorrelation.calculate_acl(samples) - if acl <= 0: - acl = numpy.inf - acls[param] = acl - return acls + acfs[param] = autocorrelation.calculate_acf( + samples).numpy() + return acfs + + +def ensemble_compute_acl(filename, start_index=None, end_index=None, + min_nsamples=10): + """Computes the autocorrleation length for an ensemble MCMC. + + Parameter values are averaged over all walkers at each iteration. + The ACL is then calculated over the averaged chain. If an ACL cannot + be calculated because there are not enough samples, it will be set + to ``inf``. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + start_index : int, optional + The start index to compute the acl from. If None, will try to use + the number of burn-in iterations in the file; otherwise, will start + at the first sample. + end_index : int, optional + The end index to compute the acl to. If None, will go to the end + of the current iteration. + min_nsamples : int, optional + Require a minimum number of samples to compute an ACL. If the + number of samples per walker is less than this, will just set to + ``inf``. Default is 10. + + Returns + ------- + dict + A dictionary giving the ACL for each parameter. + """ + acls = {} + with loadfile(filename, 'r') as fp: + for param in fp.variable_params: + samples = fp.read_raw_samples( + param, thin_start=start_index, thin_interval=1, + thin_end=end_index, flatten=False)[param] + samples = samples.mean(axis=0) + # if < min number of samples, just set to inf + if samples.size < min_nsamples: + acl = numpy.inf + else: + acl = autocorrelation.calculate_acl(samples) + if acl <= 0: + acl = numpy.inf + acls[param] = acl + maxacl = numpy.array(list(acls.values())).max() + logging.info("ACT: %s", str(maxacl*fp.thin_interval)) + return acls diff --git a/pycbc/inference/sampler/base_multitemper.py b/pycbc/inference/sampler/base_multitemper.py index 50be9b340f2..7aaec340e42 100644 --- a/pycbc/inference/sampler/base_multitemper.py +++ b/pycbc/inference/sampler/base_multitemper.py @@ -26,11 +26,12 @@ from __future__ import absolute_import +import logging from six import string_types - import numpy import h5py from pycbc.filter import autocorrelation +from pycbc.inference.io import loadfile class MultiTemperedSupport(object): @@ -96,152 +97,315 @@ def betas_from_config(cp, section): return ntemps, betas -class MultiTemperedAutocorrSupport(object): - """Provides class methods for calculating multi-tempered ACFs/ACLs. +# +# ============================================================================= +# +# Functions for computing autocorrelation lengths +# +# ============================================================================= +# + + +def compute_acf(filename, start_index=None, end_index=None, + chains=None, parameters=None, temps=None): + """Computes the autocorrleation function for independent MCMC chains with + parallel tempering. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACFs for. + start_index : int, optional + The start index to compute the acl from. If None (the default), + will try to use the burn in iteration for each chain; + otherwise, will start at the first sample. + end_index : {None, int} + The end index to compute the acl to. If None, will go to the end + of the current iteration. + chains : optional, int or array + Calculate the ACF for only the given chains. If None (the + default) ACFs for all chains will be estimated. + parameters : optional, str or array + Calculate the ACF for only the given parameters. If None (the + default) will calculate the ACF for all of the model params. + temps : optional, (list of) int or 'all' + The temperature index (or list of indices) to retrieve. If None + (the default), the ACF will only be computed for the coldest (= 0) + temperature chain. To compute an ACF for all temperates pass 'all', + or a list of all of the temperatures. + + Returns + ------- + dict : + Dictionary parameter name -> ACF arrays. The arrays have shape + ``ntemps x nchains x niterations``. """ + acfs = {} + with loadfile(filename, 'r') as fp: + if parameters is None: + parameters = fp.variable_params + if isinstance(parameters, string_types): + parameters = [parameters] + temps = _get_temps_idx(fp, temps) + if chains is None: + chains = numpy.arange(fp.nchains) + for param in parameters: + subacfs = [] + for tk in temps: + subsubacfs = [] + for ci in chains: + samples = fp.read_raw_samples( + param, thin_start=start_index, thin_interval=1, + thin_end=end_index, chains=ci, temps=tk)[param] + thisacf = autocorrelation.calculate_acf(samples).numpy() + subsubacfs.append(thisacf) + # stack the chains + subacfs.append(subsubacfs) + # stack the temperatures + acfs[param] = numpy.stack(subacfs) + return acfs - @classmethod - def compute_acf(cls, filename, start_index=None, end_index=None, - per_walker=False, walkers=None, parameters=None, - temps=None): - """Computes the autocorrleation function of the model params in the - given file. - By default, parameter values are averaged over all walkers at each - iteration. The ACF is then calculated over the averaged chain for each - temperature. An ACF per-walker will be returned instead if - ``per_walker=True``. +def compute_acl(filename, start_index=None, end_index=None, + min_nsamples=10): + """Computes the autocorrleation length for independent MCMC chains with + parallel tempering. - Parameters - ----------- - filename : str - Name of a samples file to compute ACFs for. - start_index : {None, int} - The start index to compute the acl from. If None, will try to use - the number of burn-in iterations in the file; otherwise, will start - at the first sample. - end_index : {None, int} - The end index to compute the acl to. If None, will go to the end - of the current iteration. - per_walker : optional, bool - Return the ACF for each walker separately. Default is False. - walkers : optional, int or array - Calculate the ACF using only the given walkers. If None (the - default) all walkers will be used. - parameters : optional, str or array - Calculate the ACF for only the given parameters. If None (the - default) will calculate the ACF for all of the model params. - temps : optional, (list of) int or 'all' - The temperature index (or list of indices) to retrieve. If None - (the default), the ACF will only be computed for the coldest (= 0) - temperature chain. To compute an ACF for all temperates pass 'all', - or a list of all of the temperatures. + ACLs are calculated separately for each chain. - Returns - ------- - dict : - Dictionary of arrays giving the ACFs for each parameter. If - ``per-walker`` is True, the arrays will have shape - ``ntemps x nwalkers x niterations``. Otherwise, the returned array - will have shape ``ntemps x niterations``. - """ - acfs = {} - with cls._io(filename, 'r') as fp: - if parameters is None: - parameters = fp.variable_params - if isinstance(parameters, string_types): - parameters = [parameters] - if isinstance(temps, int): - temps = [temps] - elif temps == 'all': - temps = numpy.arange(fp.ntemps) - elif temps is None: - temps = [0] - for param in parameters: - subacfs = [] - for tk in temps: - if per_walker: - # just call myself with a single walker - if walkers is None: - walkers = numpy.arange(fp.nwalkers) - arrays = [cls.compute_acfs(filename, + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + start_index : {None, int} + The start index to compute the acl from. If None, will try to use + the number of burn-in iterations in the file; otherwise, will start + at the first sample. + end_index : {None, int} + The end index to compute the acl to. If None, will go to the end + of the current iteration. + min_nsamples : int, optional + Require a minimum number of samples to compute an ACL. If the + number of samples per walker is less than this, will just set to + ``inf``. Default is 10. + + Returns + ------- + dict + A dictionary of ntemps x nchains arrays of the ACLs of each + parameter. + """ + # following is a convenience function to calculate the acl for each chain + # defined here so that we can use map for this below + def _getacl(si): + # si: the samples loaded for a specific chain; may have nans in it + si = si[~numpy.isnan(si)] + if len(si) < min_nsamples: + acl = numpy.inf + else: + acl = autocorrelation.calculate_acl(si) + if acl <= 0: + acl = numpy.inf + return acl + acls = {} + with loadfile(filename, 'r') as fp: + tidx = numpy.arange(fp.ntemps) + for param in fp.variable_params: + these_acls = numpy.zeros((fp.ntemps, fp.nchains)) + for tk in tidx: + samples = fp.read_raw_samples( + param, thin_start=start_index, thin_interval=1, + thin_end=end_index, temps=tk, flatten=False)[param] + # flatten out the temperature + samples = samples[0, ...] + # samples now has shape nchains x maxiters + if samples.shape[-1] < min_nsamples: + these_acls[tk, :] = numpy.inf + else: + these_acls[tk, :] = list(map(_getacl, samples)) + acls[param] = these_acls + # report the mean ACL: take the max over the temps and parameters + act = acl_from_raw_acls(acls)*fp.thinned_by + finite = act[numpy.isfinite(act)] + logging.info("ACTs: min %s, mean (of finite) %s, max %s", + str(act.min()), + str(finite.mean() if finite.size > 0 else numpy.inf), + str(act.max())) + return acls + + +def acl_from_raw_acls(acls): + """Calculates the ACL for one or more chains from a dictionary of ACLs. + + This is for parallel tempered MCMCs in which the chains are independent + of each other. + + The ACL for each chain is maximized over the temperatures and parameters. + + Parameters + ---------- + acls : dict + Dictionary of parameter names -> ntemps x nchains arrays of ACLs (the + thing returned by :py:func:`compute_acl`). + + Returns + ------- + array + The ACL of each chain. + """ + return numpy.array(list(acls.values())).max(axis=0).max(axis=0) + + +def ensemble_compute_acf(filename, start_index=None, end_index=None, + per_walker=False, walkers=None, parameters=None, + temps=None): + """Computes the autocorrleation function for a parallel tempered, ensemble + MCMC. + + By default, parameter values are averaged over all walkers at each + iteration. The ACF is then calculated over the averaged chain for each + temperature. An ACF per-walker will be returned instead if + ``per_walker=True``. + + Parameters + ---------- + filename : str + Name of a samples file to compute ACFs for. + start_index : int, optional + The start index to compute the acl from. If None (the default), will + try to use the number of burn-in iterations in the file; otherwise, + will start at the first sample. + end_index : int, optional + The end index to compute the acl to. If None (the default), will go to + the end of the current iteration. + per_walker : bool, optional + Return the ACF for each walker separately. Default is False. + walkers : int or array, optional + Calculate the ACF using only the given walkers. If None (the + default) all walkers will be used. + parameters : str or array, optional + Calculate the ACF for only the given parameters. If None (the + default) will calculate the ACF for all of the model params. + temps : (list of) int or 'all', optional + The temperature index (or list of indices) to retrieve. If None + (the default), the ACF will only be computed for the coldest (= 0) + temperature chain. To compute an ACF for all temperates pass 'all', + or a list of all of the temperatures. + + Returns + ------- + dict : + Dictionary of arrays giving the ACFs for each parameter. If + ``per-walker`` is True, the arrays will have shape + ``ntemps x nwalkers x niterations``. Otherwise, the returned array + will have shape ``ntemps x niterations``. + """ + acfs = {} + with loadfile(filename, 'r') as fp: + if parameters is None: + parameters = fp.variable_params + if isinstance(parameters, string_types): + parameters = [parameters] + temps = _get_temps_idx(fp, temps) + for param in parameters: + subacfs = [] + for tk in temps: + if per_walker: + # just call myself with a single walker + if walkers is None: + walkers = numpy.arange(fp.nwalkers) + arrays = [ensemble_compute_acf(filename, start_index=start_index, end_index=end_index, per_walker=False, walkers=ii, parameters=param, temps=tk)[param][0, :] - for ii in walkers] - # we'll stack all of the walker arrays to make a single - # nwalkers x niterations array; when these are stacked - # below, we'll get a ntemps x nwalkers x niterations - # array - subacfs.append(numpy.vstack(arrays)) - else: - samples = fp.read_raw_samples( - param, thin_start=start_index, - thin_interval=1, thin_end=end_index, - walkers=walkers, temps=tk, flatten=False)[param] - # contract the walker dimension using the mean, and - # flatten the (length 1) temp dimension - samples = samples.mean(axis=1)[0, :] - thisacf = autocorrelation.calculate_acf( - samples).numpy() - subacfs.append(thisacf) - # stack the temperatures - acfs[param] = numpy.stack(subacfs) - return acfs - - @classmethod - def compute_acl(cls, filename, start_index=None, end_index=None, - min_nsamples=10): - """Computes the autocorrleation length for all model params and - temperatures in the given file. - - Parameter values are averaged over all walkers at each iteration and - temperature. The ACL is then calculated over the averaged chain. - - Parameters - ----------- - filename : str - Name of a samples file to compute ACLs for. - start_index : {None, int} - The start index to compute the acl from. If None, will try to use - the number of burn-in iterations in the file; otherwise, will start - at the first sample. - end_index : {None, int} - The end index to compute the acl to. If None, will go to the end - of the current iteration. - min_nsamples : int, optional - Require a minimum number of samples to compute an ACL. If the - number of samples per walker is less than this, will just set to - ``inf``. Default is 10. - - Returns - ------- - dict - A dictionary of ntemps-long arrays of the ACLs of each parameter. - """ - acls = {} - with cls._io(filename, 'r') as fp: - if end_index is None: - end_index = fp.niterations - tidx = numpy.arange(fp.ntemps) - for param in fp.variable_params: - these_acls = numpy.zeros(fp.ntemps) - for tk in tidx: + for ii in walkers] + # we'll stack all of the walker arrays to make a single + # nwalkers x niterations array; when these are stacked + # below, we'll get a ntemps x nwalkers x niterations + # array + subacfs.append(numpy.vstack(arrays)) + else: samples = fp.read_raw_samples( - param, thin_start=start_index, thin_interval=1, - thin_end=end_index, temps=tk, flatten=False)[param] - # contract the walker dimension using the mean, and flatten - # the (length 1) temp dimension + param, thin_start=start_index, + thin_interval=1, thin_end=end_index, + walkers=walkers, temps=tk, flatten=False)[param] + # contract the walker dimension using the mean, and + # flatten the (length 1) temp dimension samples = samples.mean(axis=1)[0, :] - if samples.size < min_nsamples: - acl = numpy.inf - else: - acl = autocorrelation.calculate_acl(samples) - if acl <= 0: - acl = numpy.inf - these_acls[tk] = acl - acls[param] = these_acls - return acls + thisacf = autocorrelation.calculate_acf( + samples).numpy() + subacfs.append(thisacf) + # stack the temperatures + acfs[param] = numpy.stack(subacfs) + return acfs + + +def ensemble_compute_acl(filename, start_index=None, end_index=None, + min_nsamples=10): + """Computes the autocorrleation length for a parallel tempered, ensemble + MCMC. + + Parameter values are averaged over all walkers at each iteration and + temperature. The ACL is then calculated over the averaged chain. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + start_index : int, optional + The start index to compute the acl from. If None (the default), will + try to use the number of burn-in iterations in the file; otherwise, + will start at the first sample. + end_index : int, optional + The end index to compute the acl to. If None, will go to the end + of the current iteration. + min_nsamples : int, optional + Require a minimum number of samples to compute an ACL. If the + number of samples per walker is less than this, will just set to + ``inf``. Default is 10. + + Returns + ------- + dict + A dictionary of ntemps-long arrays of the ACLs of each parameter. + """ + acls = {} + with loadfile(filename, 'r') as fp: + if end_index is None: + end_index = fp.niterations + tidx = numpy.arange(fp.ntemps) + for param in fp.variable_params: + these_acls = numpy.zeros(fp.ntemps) + for tk in tidx: + samples = fp.read_raw_samples( + param, thin_start=start_index, thin_interval=1, + thin_end=end_index, temps=tk, flatten=False)[param] + # contract the walker dimension using the mean, and flatten + # the (length 1) temp dimension + samples = samples.mean(axis=1)[0, :] + if samples.size < min_nsamples: + acl = numpy.inf + else: + acl = autocorrelation.calculate_acl(samples) + if acl <= 0: + acl = numpy.inf + these_acls[tk] = acl + acls[param] = these_acls + maxacl = numpy.array(list(acls.values())).max() + logging.info("ACT: %s", str(maxacl*fp.thin_interval)) + return acls + + +def _get_temps_idx(fp, temps): + """Gets the indices of temperatures to load for computing ACF. + """ + if isinstance(temps, int): + temps = [temps] + elif temps == 'all': + temps = numpy.arange(fp.ntemps) + elif temps is None: + temps = [0] + return temps diff --git a/pycbc/inference/sampler/emcee.py b/pycbc/inference/sampler/emcee.py index fad7ba830ee..cf8c4875fac 100644 --- a/pycbc/inference/sampler/emcee.py +++ b/pycbc/inference/sampler/emcee.py @@ -33,9 +33,11 @@ from pycbc.pool import choose_pool from .base import (BaseSampler, setup_output) -from .base_mcmc import (BaseMCMC, MCMCAutocorrSupport, raw_samples_to_dict, +from .base_mcmc import (BaseMCMC, EnsembleSupport, + ensemble_compute_acf, ensemble_compute_acl, + raw_samples_to_dict, blob_data_to_dict, get_optional_arg_from_config) -from ..burn_in import MCMCBurnInTests +from ..burn_in import EnsembleMCMCBurnInTests from pycbc.inference.io import EmceeFile from .. import models @@ -48,7 +50,7 @@ # ============================================================================= # -class EmceeEnsembleSampler(MCMCAutocorrSupport, BaseMCMC, BaseSampler): +class EmceeEnsembleSampler(EnsembleSupport, BaseMCMC, BaseSampler): """This class is used to construct an MCMC sampler from the emcee package's EnsembleSampler. @@ -65,7 +67,7 @@ class EmceeEnsembleSampler(MCMCAutocorrSupport, BaseMCMC, BaseSampler): """ name = "emcee" _io = EmceeFile - burn_in_class = MCMCBurnInTests + burn_in_class = EnsembleMCMCBurnInTests def __init__(self, model, nwalkers, checkpoint_interval=None, checkpoint_signal=None, @@ -87,7 +89,7 @@ def __init__(self, model, nwalkers, pool.count = nprocesses # set up emcee - self._nwalkers = nwalkers + self.nwalkers = nwalkers ndim = len(model.variable_params) self._sampler = emcee.EnsembleSampler(nwalkers, ndim, model_call, pool=pool) @@ -175,7 +177,8 @@ def write_results(self, filename): """ with self.io(filename, 'a') as fp: # write samples - fp.write_samples(self.samples, self.model.variable_params, + fp.write_samples(self.samples, + parameters=self.model.variable_params, last_iteration=self.niterations) # write stats fp.write_samples(self.model_stats, @@ -190,6 +193,52 @@ def finalize(self): this just passes.""" pass + @staticmethod + def compute_acf(filename, **kwargs): + r"""Computes the autocorrelation function. + + Calls :py:func:`base_mcmc.ensemble_compute_acf`; see that + function for details. + + Parameters + ---------- + filename : str + Name of a samples file to compute ACFs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_mcmc.ensemble_compute_acf`. + + Returns + ------- + dict : + Dictionary of arrays giving the ACFs for each parameter. If + ``per-walker`` is True, the arrays will have shape + ``nwalkers x niterations``. + """ + return ensemble_compute_acf(filename, **kwargs) + + @staticmethod + def compute_acl(filename, **kwargs): + r"""Computes the autocorrelation length. + + Calls :py:func:`base_mcmc.ensemble_compute_acl`; see that + function for details. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_mcmc.ensemble_compute_acf`. + + Returns + ------- + dict + A dictionary giving the ACL for each parameter. + """ + return ensemble_compute_acl(filename, **kwargs) + @classmethod def from_config(cls, cp, model, output_file=None, nprocesses=1, use_mpi=False): diff --git a/pycbc/inference/sampler/emcee_pt.py b/pycbc/inference/sampler/emcee_pt.py index 38ce9a694b6..82d0120118e 100644 --- a/pycbc/inference/sampler/emcee_pt.py +++ b/pycbc/inference/sampler/emcee_pt.py @@ -27,17 +27,17 @@ from pycbc.pool import choose_pool from .base import (BaseSampler, setup_output) -from .base_mcmc import (BaseMCMC, raw_samples_to_dict, +from .base_mcmc import (BaseMCMC, EnsembleSupport, raw_samples_to_dict, get_optional_arg_from_config) from .base_multitemper import (MultiTemperedSupport, - MultiTemperedAutocorrSupport) -from ..burn_in import MultiTemperedMCMCBurnInTests + ensemble_compute_acf, ensemble_compute_acl) +from ..burn_in import EnsembleMultiTemperedMCMCBurnInTests from pycbc.inference.io import EmceePTFile from .. import models -class EmceePTSampler(MultiTemperedAutocorrSupport, MultiTemperedSupport, - BaseMCMC, BaseSampler): +class EmceePTSampler(MultiTemperedSupport, EnsembleSupport, BaseMCMC, + BaseSampler): """This class is used to construct a parallel-tempered MCMC sampler from the emcee package's PTSampler. @@ -66,7 +66,7 @@ class EmceePTSampler(MultiTemperedAutocorrSupport, MultiTemperedSupport, """ name = "emcee_pt" _io = EmceePTFile - burn_in_class = MultiTemperedMCMCBurnInTests + burn_in_class = EnsembleMultiTemperedMCMCBurnInTests def __init__(self, model, ntemps, nwalkers, betas=None, checkpoint_interval=None, checkpoint_signal=None, @@ -102,7 +102,7 @@ def __init__(self, model, ntemps, nwalkers, betas=None, self._sampler = emcee.PTSampler(ntemps, nwalkers, ndim, model_call, prior_call, pool=pool, betas=betas) - self._nwalkers = nwalkers + self.nwalkers = nwalkers self._ntemps = ntemps self._checkpoint_interval = checkpoint_interval self._checkpoint_signal = checkpoint_signal @@ -119,6 +119,53 @@ def base_shape(self): def betas(self): return self._sampler.betas + @staticmethod + def compute_acf(filename, **kwargs): + r"""Computes the autocorrelation function. + + Calls :py:func:`base_multitemper.ensemble_compute_acf`; see that + function for details. + + Parameters + ---------- + filename : str + Name of a samples file to compute ACFs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.ensemble_compute_acf`. + + Returns + ------- + dict : + Dictionary of arrays giving the ACFs for each parameter. If + ``per-walker=True`` is passed as a keyword argument, the arrays + will have shape ``ntemps x nwalkers x niterations``. Otherwise, the + returned array will have shape ``ntemps x niterations``. + """ + return ensemble_compute_acf(filename, **kwargs) + + @staticmethod + def compute_acl(filename, **kwargs): + r"""Computes the autocorrelation length. + + Calls :py:func:`base_multitemper.ensemble_compute_acl`; see that + function for details. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.ensemble_compute_acl`. + + Returns + ------- + dict + A dictionary of ntemps-long arrays of the ACLs of each parameter. + """ + return ensemble_compute_acl(filename, **kwargs) + @classmethod def from_config(cls, cp, model, output_file=None, nprocesses=1, use_mpi=False): @@ -304,7 +351,8 @@ def write_results(self, filename): """ with self.io(filename, 'a') as fp: # write samples - fp.write_samples(self.samples, self.model.variable_params, + fp.write_samples(self.samples, + parameters=self.model.variable_params, last_iteration=self.niterations) # write stats fp.write_samples(self.model_stats, last_iteration=self.niterations) diff --git a/pycbc/inference/sampler/epsie.py b/pycbc/inference/sampler/epsie.py index 30833c1b6d7..cde12adc428 100644 --- a/pycbc/inference/sampler/epsie.py +++ b/pycbc/inference/sampler/epsie.py @@ -29,25 +29,25 @@ from pycbc.pool import choose_pool from .base import (BaseSampler, setup_output) -from .base_mcmc import (BaseMCMC, get_optional_arg_from_config) -from .base_multitemper import (MultiTemperedSupport, - MultiTemperedAutocorrSupport) +from .base_mcmc import (BaseMCMC, get_optional_arg_from_config, + nsamples_in_chain) +from .base_multitemper import (MultiTemperedSupport, compute_acf, compute_acl, + acl_from_raw_acls) from ..burn_in import MultiTemperedMCMCBurnInTests from ..jump import epsie_proposals_from_config from ..io import EpsieFile from .. import models -class EpsieSampler(MultiTemperedAutocorrSupport, MultiTemperedSupport, - BaseMCMC, BaseSampler): +class EpsieSampler(MultiTemperedSupport, BaseMCMC, BaseSampler): """Constructs an MCMC sampler using epsie's parallel-tempered sampler. Parameters ---------- model : model A model from ``pycbc.inference.models``. - nwalkers : int - Number of walkers to use in the sampler. + nchains : int + Number of chains to use in the sampler. ntemps : int, optional Number of temperatures to use in the sampler. A geometrically-spaced temperature ladder with the gievn number of levels will be constructed @@ -121,7 +121,7 @@ def __init__(self, model, nchains, ntemps=None, betas=None, default_proposal_args=default_proposal_args, seed=seed, pool=pool) # set other parameters - self._nwalkers = nchains + self.nchains = nchains self._ntemps = ntemps self._checkpoint_interval = checkpoint_interval self._checkpoint_signal = checkpoint_signal @@ -134,11 +134,6 @@ def io(self): def base_shape(self): return (self.ntemps, self.nchains,) - @property - def nchains(self): - """Alias for ``nwalkers``.""" - return self._nwalkers - @property def betas(self): """The inverse temperatures being used.""" @@ -157,6 +152,77 @@ def swap_interval(self): """Number of iterations between temperature swaps.""" return self._sampler.swap_interval + @staticmethod + def compute_acf(filename, **kwargs): + r"""Computes the autocorrelation function. + + Calls :py:func:`base_multitemper.compute_acf`; see that + function for details. + + Parameters + ---------- + filename : str + Name of a samples file to compute ACFs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.compute_acf`. + + Returns + ------- + dict : + Dictionary of arrays giving the ACFs for each parameter. The arrays + will have shape ``ntemps x nchains x niterations``. + """ + return compute_acf(filename, **kwargs) + + @staticmethod + def compute_acl(filename, **kwargs): + r"""Computes the autocorrelation length. + + Calls :py:func:`base_multitemper.compute_acl`; see that + function for details. + + Parameters + ----------- + filename : str + Name of a samples file to compute ACLs for. + \**kwargs : + All other keyword arguments are passed to + :py:func:`base_multitemper.compute_acl`. + + Returns + ------- + dict + A dictionary of ntemps-long arrays of the ACLs of each parameter. + """ + return compute_acl(filename, **kwargs) + + @property + def acl(self): # pylint: disable=invalid-overridden-method + """The autocorrelation lengths of the chains. + """ + return acl_from_raw_acls(self.raw_acls) + + @property + def effective_nsamples(self): # pylint: disable=invalid-overridden-method + """The effective number of samples post burn-in that the sampler has + acquired so far. + """ + act = self.act + if act is None: + act = numpy.inf + if self.burn_in is None: + start_iter = 0 + else: + start_iter = self.burn_in.burn_in_iteration + nperchain = nsamples_in_chain(start_iter, act, self.niterations) + if self.burn_in is not None: + # ensure that any chain not burned in has zero samples + nperchain[~self.burn_in.is_burned_in] = 0 + # and that any chain that is burned in has at least one sample + nperchain[self.burn_in.is_burned_in & (nperchain < 1)] = 1 + return int(nperchain.sum()) + @property def samples(self): """A dict mapping ``variable_params`` to arrays of samples currently @@ -240,7 +306,8 @@ def write_results(self, filename): """ with self.io(filename, 'a') as fp: # write samples - fp.write_samples(self.samples, self.model.variable_params, + fp.write_samples(self.samples, + parameters=self.model.variable_params, last_iteration=self.niterations) # write stats fp.write_samples(self.model_stats, last_iteration=self.niterations) From 31102a2b969ca5638451f4505d7fa5f779934403 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 22 Jun 2020 17:18:08 +0200 Subject: [PATCH 03/68] Avoid numpy 1.19 (#3328) * try fixing numpy to 1.18.5 for py37 * add more numpy settings; make it <1.19 rather than pinning to 18.5 --- pyproject.toml | 2 +- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 10939bfc017..53433932a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,4 +2,4 @@ requires = ["setuptools", "wheel", "cython>=0.29", - "numpy>=1.16.0"] + "numpy>=1.16.0,<1.19"] diff --git a/requirements.txt b/requirements.txt index 2a9a2599a19..33bd8872e1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ decorator>=3.4.2 scipy>=0.16.0; python_version >= '3.5' scipy>=0.16.0,<1.3.0; python_version <= '3.4' matplotlib>=2.0.0 -numpy>=1.16.0; python_version >= '3.5' +numpy>=1.16.0,<1.19; python_version >= '3.5' numpy>=1.16.0,<1.17.0; python_version <= '3.4' pillow h5py<2.10.0 diff --git a/setup.py b/setup.py index d320bfff506..608bfc93367 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ install_requires = setup_requires + ['Mako>=1.0.1', 'cython>=0.29', 'decorator>=3.4.2', - 'numpy>=1.16.0; python_version >= "3.5"', + 'numpy>=1.16.0,<1.19; python_version >= "3.5"', 'numpy>=1.16.0,<1.17.0; python_version <= "2.7"', 'scipy>=0.16.0; python_version >= "3.5"', 'scipy>=0.16.0,<1.3.0; python_version <= "3.4"', From 0dd17b2047a5f867aabf42a9d7131c31b17f73d4 Mon Sep 17 00:00:00 2001 From: Gareth S Davies Date: Mon, 22 Jun 2020 18:03:00 +0200 Subject: [PATCH 04/68] give optimal_snr=0 for vetoed ifo for injections (#3321) --- bin/hdfcoinc/pycbc_coinc_hdfinjfind | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/hdfcoinc/pycbc_coinc_hdfinjfind b/bin/hdfcoinc/pycbc_coinc_hdfinjfind index 6d69bd2fa95..a75f80b1d99 100755 --- a/bin/hdfcoinc/pycbc_coinc_hdfinjfind +++ b/bin/hdfcoinc/pycbc_coinc_hdfinjfind @@ -222,8 +222,15 @@ for trigger_file, injection_file in zip(args.trigger_files, # pick up optimal SNRs if multi_ifo_style: for ifo, column in args.optimal_snr_column.items(): + optimal_snr_all = numpy.array(sim_table.get_column(column)) + # As a single detector being vetoed won't veto all combinations, + # need to set optimal_snr of a vetoed ifo to zero in order + # to later calculate decisive optimal snr + iws, _ = indices_within_segments(inj_time, [args.veto_file], ifo=ifo, + segment_name=args.segment_name) + optimal_snr_all[iws] = 0 hdf_append(fo, 'injections/optimal_snr_%s' % ifo, - sim_table.get_column(column)) + optimal_snr_all) else: ifo_map = {f.attrs['detector_1']: 1, f.attrs['detector_2']: 2} From d77fafd1db7731d2ec1a652a5b02ac4f975a39e0 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 22 Jun 2020 18:37:57 +0200 Subject: [PATCH 05/68] update plot samples to use chains or walkers (#3323) --- bin/inference/pycbc_inference_plot_samples | 58 ++++++++++++---------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/bin/inference/pycbc_inference_plot_samples b/bin/inference/pycbc_inference_plot_samples index 93778332905..4ece1a64ac0 100644 --- a/bin/inference/pycbc_inference_plot_samples +++ b/bin/inference/pycbc_inference_plot_samples @@ -32,15 +32,16 @@ from pycbc.inference import (option_utils, io) import sys # command line usage -parser = argparse.parser = io.ResultsArgumentParser(skip_args=['walkers']) +parser = argparse.parser = io.ResultsArgumentParser( + skip_args=['chains', 'iteration']) parser.add_argument("--verbose", action="store_true", default=False, help="Print logging info.") parser.add_argument("--version", action="version", version=__version__, help="show version number and exit") -parser.add_argument("--walkers", nargs='+', default=None, - help="Walker indices to plot. Options are 'all' or one " - "or more walker indices. Default is to plot the " - "average of all walkers for the input " +parser.add_argument("--chains", nargs='+', default=None, + help="Chain/walker indices to plot. Options are 'all' or " + "one or more chain indices. Default is to plot the " + "average of all chains for the input " "`--parameters`.") parser.add_argument("--output-file", type=str, required=True, help="Path to output plot.") @@ -57,11 +58,11 @@ fp, parameters, labels, _ = io.results_from_cli(opts, load_samples=False) # get number of dimensions ndim = len(parameters) -# get walker indices -if opts.walkers == ['all'] or opts.walkers == None: - walkers = range(fp.nwalkers) +# get chain indices +if opts.chains == ['all'] or opts.chains == None: + chains = range(fp.nchains) else: - walkers = list(map(int, opts.walkers)) + chains = list(map(int, opts.chains)) # plot samples # plot each parameter as a different subplot @@ -91,29 +92,36 @@ else: thinned_by = fp.thinned_by*xint xmin = xmin*fp.thinned_by +# create the kwargs to load samples +kwargs = {'thin_start': opts.thin_start, + 'thin_interval': opts.thin_interval, + 'thin_end': opts.thin_end} # add the temperature args if it exists -additional_args = {} try: - additional_args['temps'] = opts.temps + kwargs['temps'] = opts.temps except AttributeError: pass for i, arg in enumerate(parameters): chains_arg = [] - for widx in walkers: - chain = fp.read_samples(parameters, walkers=widx, - thin_start=opts.thin_start, - thin_interval=opts.thin_interval, - thin_end=opts.thin_end, **additional_args) + for cidx in chains: + kwargs['chains'] = cidx + try: + chain = fp.read_samples(parameters, **kwargs) + except TypeError: + # will get this if ensemble sampler; change "chains" to "walkers" + kwargs['walkers'] = kwargs.pop('chains') + chain = fp.read_samples(parameters, **kwargs) chains_arg.append(chain[arg]) - if opts.walkers is not None: + if opts.chains is not None: for chain in chains_arg: - # plot each walker as a different line on the subplot - axs[i].plot((numpy.arange(len(chain)))*thinned_by + xmin, chain, alpha=0.6) + # plot each chain as a different line on the subplot + axs[i].plot((numpy.arange(len(chain)))*thinned_by + xmin, chain, + alpha=0.6) else: - # plot the average of all walkers for the parameter on the subplot + # plot the average of all chains for the parameter on the subplot chains_arg = numpy.array(chains_arg) - avg_chain = [chains_arg[:, j].sum()/fp.nwalkers + avg_chain = [chains_arg[:, j].sum()/fp.nchains for j in range(len(chains_arg[0]))] axs[i].plot((numpy.arange(len(avg_chain)))*thinned_by + xmin, avg_chain) # Set y labels @@ -124,10 +132,10 @@ fp.close() caption_kwargs = { "parameters" : ", ".join(sorted(list(labels.values()))), } -caption = r"""Parameter samples from the walker chains whose indices were -provided as inputs. Each line is a different chain of walker samples in that -case. If no walker indices were provided, the plot shows the variation of the -parameter sample values averaged over all walkers.""" +caption = r"""Parameter samples from the chains whose indices were +provided as inputs. Each line is a different chain of samples in that +case. If no chain indices were provided, the plot shows the variation of the +parameter sample values averaged over all chains.""" title = "Samples for {parameters}".format(**caption_kwargs) results.save_fig_with_metadata(fig, opts.output_file, cmd=" ".join(sys.argv), From fdaaf42c9c713ef8d1adc9d2fc826027bb345c64 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 22 Jun 2020 19:03:36 +0200 Subject: [PATCH 06/68] fix plot_gelman rubin for python 3 (#3324) --- pycbc/inference/gelman_rubin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pycbc/inference/gelman_rubin.py b/pycbc/inference/gelman_rubin.py index 269f59ef92e..9433fd8f06c 100644 --- a/pycbc/inference/gelman_rubin.py +++ b/pycbc/inference/gelman_rubin.py @@ -16,6 +16,7 @@ diagnostic statistic. """ +from __future__ import division import numpy @@ -94,7 +95,7 @@ def gelman_rubin(chains, auto_burn_in=True): # this will have shape (nchains, nparameters, niterations) if auto_burn_in: _, _, niterations = numpy.array(chains).shape - chains = numpy.array([chain[:, niterations / 2 + 1:] + chains = numpy.array([chain[:, niterations // 2 + 1:] for chain in chains]) # get number of chains, parameters, and iterations From c8d47ea718755b68fc5cad339a118734810f0554 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 22 Jun 2020 19:31:22 +0200 Subject: [PATCH 07/68] Add spherical to cartesian transform (#3325) * add generic spherical <-> cartesian transforms; deprecation warnings for spin12 transforms * fix typo, use sphericaltocartesian in common cbc transforms * fix typos, register transform * update example ini file * initialize cartesian to spherical with right arguments in test * fix bug in inverse transform * pep8 --- examples/inference/priors/gw150914_like.ini | 22 ++ pycbc/transforms.py | 240 ++++++++++++++------ test/test_transforms.py | 8 +- 3 files changed, 199 insertions(+), 71 deletions(-) diff --git a/examples/inference/priors/gw150914_like.ini b/examples/inference/priors/gw150914_like.ini index 7b0667e45f2..2e634af3e4d 100644 --- a/examples/inference/priors/gw150914_like.ini +++ b/examples/inference/priors/gw150914_like.ini @@ -71,6 +71,28 @@ name = uniform_solidangle polar-angle = spin2_polar azimuthal-angle = spin2_azimuthal +; The waveform generator expects spins to be in cartesian coordinates, with +; names spin(1|2)(x|y|z). We therefore need to provide a waveform transform +; that converts the spherical coordinates that we have defined the spin prior +; in to cartesian coordinates. +[waveform_transforms-spin1x+spin1y+spin1z] +name = spherical_to_cartesian +x = spin1x +y = spin1y +z = spin1z +radial = spin1_a +polar = spin1_polar +azimuthal = spin1_azimuthal + +[waveform_transforms-spin2x+spin2y+spin2z] +name = spherical_to_cartesian +x = spin2x +y = spin2y +z = spin2z +radial = spin2_a +polar = spin2_polar +azimuthal = spin2_azimuthal + [prior-distance] ; following gives a uniform volume prior name = uniform_radius diff --git a/pycbc/transforms.py b/pycbc/transforms.py index 1a205ee8c15..57249e979cf 100644 --- a/pycbc/transforms.py +++ b/pycbc/transforms.py @@ -614,18 +614,39 @@ def inverse_jacobian(self, maps): return (2.**(-1./5) * self.ref_mass / mchirp)**(5./6) -class SphericalSpin1ToCartesianSpin1(BaseTransform): - """ Converts spherical spin parameters (magnitude and two angles) to - catesian spin parameters. This class only transforms spsins for the first - component mass. +class SphericalToCartesian(BaseTransform): + """Converts spherical coordinates to cartesian. + + Parameters + ---------- + x : str + The name of the x parameter. + y : str + The name of the y parameter. + z : str + The name of the z parameter. + radial : str + The name of the radial parameter. + azimuthal : str + The name of the azimuthal angle parameter. + polar : str + The name of the polar angle parameter. """ - name = "spherical_spin_1_to_cartesian_spin_1" - _inputs = [parameters.spin1_a, parameters.spin1_azimuthal, - parameters.spin1_polar] - _outputs = [parameters.spin1x, parameters.spin1y, parameters.spin1z] + name = "spherical_to_cartesian" + + def __init__(self, x, y, z, radial, azimuthal, polar): + self.x = x + self.y = y + self.z = z + self.radial = radial + self.polar = polar + self.azimuthal = azimuthal + self._inputs = [self.radial, self.azimuthal, self.polar] + self._outputs = [self.x, self.y, self.z] + super(SphericalToCartesian, self).__init__() def transform(self, maps): - """ This function transforms from spherical to cartesian spins. + """This function transforms from spherical to cartesian spins. Parameters ---------- @@ -637,10 +658,13 @@ def transform(self, maps): >>> import numpy >>> from pycbc import transforms - >>> t = transforms.SphericalSpin1ToCartesianSpin1() - >>> t.transform({'spin1_a': numpy.array([0.1]), 'spin1_azimuthal': numpy.array([0.1]), 'spin1_polar': numpy.array([0.1])}) - {'spin1_a': array([ 0.1]), 'spin1_azimuthal': array([ 0.1]), 'spin1_polar': array([ 0.1]), - 'spin2x': array([ 0.00993347]), 'spin2y': array([ 0.00099667]), 'spin2z': array([ 0.09950042])} + >>> t = transforms.SphericalToCartesian('x', 'y', 'z', + 'a', 'phi', 'theta') + >>> t.transform({'a': numpy.array([0.1]), 'phi': numpy.array([0.1]), + 'theta': numpy.array([0.1])}) + {'a': array([ 0.1]), 'phi': array([ 0.1]), 'theta': array([ 0.1]), + 'x': array([ 0.00993347]), 'y': array([ 0.00099667]), + 'z': array([ 0.09950042])} Returns ------- @@ -648,13 +672,16 @@ def transform(self, maps): A dict with key as parameter name and value as numpy.array or float of transformed values. """ - a, az, po = self._inputs - data = coordinates.spherical_to_cartesian(maps[a], maps[az], maps[po]) - out = {param : val for param, val in zip(self._outputs, data)} + a = self.radial + az = self.azimuthal + po = self.polar + x, y, z = coordinates.spherical_to_cartesian(maps[a], maps[az], + maps[po]) + out = {self.x: x, self.y: y, self.z: z} return self.format_output(maps, out) def inverse_transform(self, maps): - """ This function transforms from cartesian to spherical spins. + """This function transforms from cartesian to spherical spins. Parameters ---------- @@ -666,21 +693,57 @@ def inverse_transform(self, maps): A dict with key as parameter name and value as numpy.array or float of transformed values. """ - sx, sy, sz = self._outputs - data = coordinates.cartesian_to_spherical(maps[sx], maps[sy], maps[sz]) - out = {param : val for param, val in zip(self._outputs, data)} + x = self.x + y = self.y + z = self.z + a, az, po = coordinates.cartesian_to_spherical(maps[x], maps[y], + maps[z]) + out = {self.radial: a, self.azimuthal: az, self.polar: po} return self.format_output(maps, out) -class SphericalSpin2ToCartesianSpin2(SphericalSpin1ToCartesianSpin1): - """ Converts spherical spin parameters (magnitude and two angles) to - cartesian spin parameters. This class only transforms spins for the second +class SphericalSpin1ToCartesianSpin1(SphericalToCartesian): + """Converts spherical spin parameters (radial and two angles) to + catesian spin parameters. This class only transforms spsins for the first + component mass. + + **Deprecation Warning:** This will be removed in a future update. Use + :py:class:`SphericalToCartesian` with spin-parameter names passed in + instead. + """ + name = "spherical_spin_1_to_cartesian_spin_1" + + def __init__(self): + logging.warning("Deprecation warning: the {} transform will be " + "removed in a future update. Please use {} instead, " + "passing spin1x, spin1y, spin1z, spin1_a, " + "spin1_azimuthal, spin1_polar as arguments." + .format(self.name, SphericalToCartesian.name)) + super(SphericalSpin1ToCartesianSpin1, self).__init__( + "spin1x", "spin1y", "spin1z", "spin1_a", "spin1_azimuthal", + "spin1_polar") + + +class SphericalSpin2ToCartesianSpin2(SphericalToCartesian): + """Converts spherical spin parameters (radial and two angles) to + catesian spin parameters. This class only transforms spsins for the first component mass. + + **Deprecation Warning:** This will be removed in a future update. Use + :py:class:`SphericalToCartesian` with spin-parameter names passed in + instead. """ name = "spherical_spin_2_to_cartesian_spin_2" - _inputs = [parameters.spin2_a, parameters.spin2_azimuthal, - parameters.spin2_polar] - _outputs = [parameters.spin2x, parameters.spin2y, parameters.spin2z] + + def __init__(self): + logging.warning("Deprecation warning: the {} transform will be " + "removed in a future update. Please use {} instead, " + "passing spin2x, spin2y, spin2z, spin2_a, " + "spin2_azimuthal, spin2_polar as arguments." + .format(self.name, SphericalToCartesian.name)) + super(SphericalSpin2ToCartesianSpin2, self).__init__( + "spin2x", "spin2y", "spin2z", "spin2_a", "spin2_azimuthal", + "spin2_polar") class DistanceToRedshift(BaseTransform): @@ -1673,60 +1736,80 @@ class DistanceToChirpDistance(ChirpDistanceToDistance): inverse_jacobian = inverse.jacobian -class CartesianSpin1ToSphericalSpin1(SphericalSpin1ToCartesianSpin1): - """The inverse of SphericalSpin1ToCartesianSpin1. +class CartesianToSpherical(SphericalToCartesian): + """Converts spherical coordinates to cartesian. + + Parameters + ---------- + x : str + The name of the x parameter. + y : str + The name of the y parameter. + z : str + The name of the z parameter. + radial : str + The name of the radial parameter. + azimuthal : str + The name of the azimuthal angle parameter. + polar : str + The name of the polar angle parameter. """ - name = "cartesian_spin_1_to_spherical_spin_1" - inverse = SphericalSpin1ToCartesianSpin1 - _inputs = inverse._outputs - _outputs = inverse._inputs + name = "cartesian_to_spherical" + inverse = SphericalToCartesian + transform = inverse.inverse_transform + inverse_transform = inverse.transform jacobian = inverse.inverse_jacobian inverse_jacobian = inverse.jacobian - def transform(self, maps): - """ This function transforms from cartesian to spherical spins. - - Parameters - ---------- - maps : a mapping object + def __init__(self, *args): + super(CartesianToSpherical, self).__init__(*args) + # swap inputs and outputs + outputs = self._inputs + inputs = self._outputs + self._inputs = inputs + self._outputs = outputs + self.inputs = set(self._inputs) + self.outputs = set(self._outputs) - Returns - ------- - out : dict - A dict with key as parameter name and value as numpy.array or float - of transformed values. - """ - sx, sy, sz = self._inputs - data = coordinates.cartesian_to_spherical(maps[sx], maps[sy], maps[sz]) - out = {param : val for param, val in zip(self._outputs, data)} - return self.format_output(maps, out) - def inverse_transform(self, maps): - """ This function transforms from spherical to cartesian spins. +class CartesianSpin1ToSphericalSpin1(CartesianToSpherical): + """The inverse of SphericalSpin1ToCartesianSpin1. - Parameters - ---------- - maps : a mapping object + **Deprecation Warning:** This will be removed in a future update. Use + :py:class:`CartesianToSpherical` with spin-parameter names passed in + instead. + """ + name = "cartesian_spin_1_to_spherical_spin_1" - Returns - ------- - out : dict - A dict with key as parameter name and value as numpy.array or float - of transformed values. - """ - a, az, po = self._outputs - data = coordinates.spherical_to_cartesian(maps[a], maps[az], maps[po]) - out = {param : val for param, val in zip(self._outputs, data)} - return self.format_output(maps, out) + def __init__(self): + logging.warning("Deprecation warning: the {} transform will be " + "removed in a future update. Please use {} instead, " + "passing spin1x, spin1y, spin1z, spin1_a, " + "spin1_azimuthal, spin1_polar as arguments." + .format(self.name, CartesianToSpherical.name)) + super(CartesianSpin1ToSphericalSpin1, self).__init__( + "spin1x", "spin1y", "spin1z", "spin1_a", "spin1_azimuthal", + "spin1_polar") -class CartesianSpin2ToSphericalSpin2(CartesianSpin1ToSphericalSpin1): +class CartesianSpin2ToSphericalSpin2(CartesianToSpherical): """The inverse of SphericalSpin2ToCartesianSpin2. + + **Deprecation Warning:** This will be removed in a future update. Use + :py:class:`CartesianToSpherical` with spin-parameter names passed in + instead. """ name = "cartesian_spin_2_to_spherical_spin_2" - inverse = SphericalSpin2ToCartesianSpin2 - _inputs = inverse._outputs - _outputs = inverse._inputs + + def __init__(self): + logging.warning("Deprecation warning: the {} transform will be " + "removed in a future update. Please use {} instead, " + "passing spin2x, spin2y, spin2z, spin2_a, " + "spin2_azimuthal, spin2_polar as arguments." + .format(self.name, CartesianToSpherical.name)) + super(CartesianSpin2ToSphericalSpin2, self).__init__( + "spin2x", "spin2y", "spin2z", "spin2_a", "spin2_azimuthal", + "spin2_polar") class CartesianSpinToAlignedMassSpin(AlignedMassSpinToCartesianSpin): @@ -1897,6 +1980,7 @@ def from_config(cls, cp, section, outputs, skip_opts=None, # set the inverse of the forward transforms to the inverse transforms MchirpQToMass1Mass2.inverse = Mass1Mass2ToMchirpQ ChirpDistanceToDistance.inverse = DistanceToChirpDistance +SphericalToCartesian.inverse = CartesianToSpherical SphericalSpin1ToCartesianSpin1.inverse = CartesianSpin1ToSphericalSpin1 SphericalSpin2ToCartesianSpin2.inverse = CartesianSpin2ToSphericalSpin2 AlignedMassSpinToCartesianSpin.inverse = CartesianSpinToAlignedMassSpin @@ -1923,6 +2007,8 @@ def from_config(cls, cp, section, outputs, skip_opts=None, Mass1Mass2ToMchirpEta.name : Mass1Mass2ToMchirpEta, ChirpDistanceToDistance.name : ChirpDistanceToDistance, DistanceToChirpDistance.name : DistanceToChirpDistance, + SphericalToCartesian.name : SphericalToCartesian, + CartesianToSpherical.name : CartesianToSpherical, SphericalSpin1ToCartesianSpin1.name : SphericalSpin1ToCartesianSpin1, CartesianSpin1ToSphericalSpin1.name : CartesianSpin1ToSphericalSpin1, SphericalSpin2ToCartesianSpin2.name : SphericalSpin2ToCartesianSpin2, @@ -1947,13 +2033,27 @@ def from_config(cls, cp, section, outputs, skip_opts=None, # to coordinates understood by the waveform generator common_cbc_forward_transforms = [ MchirpQToMass1Mass2(), DistanceToRedshift(), - SphericalSpin1ToCartesianSpin1(), SphericalSpin2ToCartesianSpin2(), + SphericalToCartesian(parameters.spin1x, parameters.spin1y, + parameters.spin1z, parameters.spin1_a, + parameters.spin1_azimuthal, parameters.spin1_polar), + SphericalToCartesian(parameters.spin2x, parameters.spin2y, + parameters.spin2z, parameters.spin2_a, + parameters.spin2_azimuthal, parameters.spin2_polar), AlignedMassSpinToCartesianSpin(), PrecessionMassSpinToCartesianSpin(), ChiPToCartesianSpin(), ChirpDistanceToDistance() ] common_cbc_inverse_transforms = [_t.inverse() - for _t in common_cbc_forward_transforms - if _t.inverse is not None] + for _t in common_cbc_forward_transforms + if not (_t.inverse is None or + _t.name == 'spherical_to_cartesian')] +common_cbc_inverse_transforms.extend([ + CartesianToSpherical(parameters.spin1x, parameters.spin1y, + parameters.spin1z, parameters.spin1_a, + parameters.spin1_azimuthal, parameters.spin1_polar), + CartesianToSpherical(parameters.spin2x, parameters.spin2y, + parameters.spin2z, parameters.spin2_a, + parameters.spin2_azimuthal, parameters.spin2_polar)]) + common_cbc_transforms = common_cbc_forward_transforms + \ common_cbc_inverse_transforms diff --git a/test/test_transforms.py b/test/test_transforms.py index d7ca9ac4182..c5cd0df63e9 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -67,7 +67,13 @@ def test_inverse(self): # check if inverse exists if trans.name in IGNORE: continue - inv = trans.inverse() + if trans.name == 'spherical_to_cartesian': + # spherical to cartesian requires the cartesian and spherical + # parameter names to be specified, which we can get from + # the inputs and outputs + inv = trans.inverse(*trans._outputs+trans._inputs) + else: + inv = trans.inverse() # generate some random points in_map = {p : numpy.random.uniform(*RANGES[p]) From 21dff97bd86f420d007abb0f82235d269796611f Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 22 Jun 2020 21:21:05 +0200 Subject: [PATCH 08/68] Register some more parameter labels (#3331) * register some new parameter labels * fix seconds in delta tc --- pycbc/waveform/parameters.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pycbc/waveform/parameters.py b/pycbc/waveform/parameters.py index d6dca185c39..50beab625e3 100644 --- a/pycbc/waveform/parameters.py +++ b/pycbc/waveform/parameters.py @@ -193,7 +193,7 @@ def docstr(self, prefix='', include_label=True): dtype=float, default=0., label=r"$e$", description="Eccentricity.") -# derived parameters (these are not used for waveform generation) for masses +# derived parameters (these are not used for waveform generation) mchirp = Parameter("mchirp", dtype=float, label=r"$\mathcal{M}~(\mathrm{M}_\odot)$", description="The chirp mass of the binary (in solar masses).") @@ -204,8 +204,24 @@ def docstr(self, prefix='', include_label=True): dtype=float, label=r"$M~(\mathrm{M}_\odot)$", description="The total mass of the binary (in solar masses).") q = Parameter("q", - dtype=float, label=r"$q$", - description="The mass ratio, m1/m2, where m1 >= m2.") + dtype=float, label=r"$q$", + description="The mass ratio, m1/m2, where m1 >= m2.") +srcmass1 = Parameter("srcmass1", dtype=float, + label=r"$m_1^{\rm{src}}~(\mathrm{M}_\odot)$", + description="The mass of the first component object in " + "the source frame (in solar masses).") +srcmass2 = Parameter("srcmass1", dtype=float, + label=r"$m_2^{\rm{src}}~(\mathrm{M}_\odot)$", + description="The mass of the second component object in " + "the source frame (in solar masses).") +srcmchirp = Parameter("srcmchirp", dtype=float, + label=r"$\mathcal{M}^{\rm{src}}~(\mathrm{M}_\odot)$", + description="The chirp mass of the binary in the " + "source frame (in solar masses).") +srcmtotal = Parameter("mtotal", dtype=float, + label=r"$M^{\rm{src}}~(\mathrm{M}_\odot)$", + description="The total mass of the binary in the " + "source frame (in solar masses).") primary_mass = Parameter("primary_mass", dtype=float, label=r"$m_{1}$", description="Mass of the primary object (in solar masses).") @@ -392,6 +408,9 @@ def docstr(self, prefix='', include_label=True): tc = Parameter("tc", dtype=float, default=None, label=r"$t_c$ (s)", description="Coalescence time (s).") +delta_tc = Parameter("delta_tc", dtype=float, + label=r"$\Delta t_c~(\rm{s})$", + description="Coalesence time offset.") ra = Parameter("ra", dtype=float, default=None, label=r"$\alpha$", description="Right ascension (rad).") @@ -404,6 +423,9 @@ def docstr(self, prefix='', include_label=True): redshift = Parameter("redshift", dtype=float, default=None, label=r"$z$", description="Redshift.") +comoving_volume = Parameter("comoving_volume", dtype=float, + label=r"$V_C~(\rm{Mpc}^3)$", + description="Comoving volume (in cubic Mpc).") # # Calibration parameters From bf6ee4c4154c6dd0aeb94d4700a711948be7a879 Mon Sep 17 00:00:00 2001 From: Johannes Buchner Date: Thu, 9 Apr 2020 20:22:57 +0200 Subject: [PATCH 09/68] add flexibility to UltraNest sampler --- pycbc/inference/sampler/ultranest.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pycbc/inference/sampler/ultranest.py b/pycbc/inference/sampler/ultranest.py index 6984337d4c0..3d886716861 100644 --- a/pycbc/inference/sampler/ultranest.py +++ b/pycbc/inference/sampler/ultranest.py @@ -52,11 +52,16 @@ class UltranestSampler(BaseSampler): ---------- model : model A model from ``pycbc.inference.models``. + log_dir : str + Folder where files should be stored for resuming (optional). + stepsampling : bool + If false, uses rejection sampling. If true, uses + hit-and-run sampler, which scales better with dimensionality. """ name = "ultranest" _io = UltranestFile - def __init__(self, model, **kwargs): + def __init__(self, model, log_dir=None, stepsampling=False, **kwargs): super(UltranestSampler, self).__init__(model) import ultranest @@ -65,7 +70,12 @@ def __init__(self, model, **kwargs): self._sampler = ultranest.ReactiveNestedSampler( list(self.model.variable_params), log_likelihood_call, - prior_call) + prior_call, log_dir=log_dir, resume=True) + + if stepsampling: + import ultranest.stepsampler + self._sampler.stepsampler = ultranest.stepsampler.RegionBallSliceSampler( + nsteps=100, adaptive_nsteps='move-distance', region_filter=True) self.nlive = 0 self.ndim = len(self.model.variable_params) @@ -74,6 +84,9 @@ def __init__(self, model, **kwargs): def run(self): self.result = self._sampler.run(**self.kwargs) + self._sampler.print_results() + self._sampler.plot() + @property def io(self): From df506f4e6673fc6436566455cc619f617414859f Mon Sep 17 00:00:00 2001 From: Alexander Harvey Nitz Date: Mon, 22 Jun 2020 20:36:34 +0200 Subject: [PATCH 10/68] make plotting optional, enable setting by config file --- pycbc/inference/sampler/ultranest.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pycbc/inference/sampler/ultranest.py b/pycbc/inference/sampler/ultranest.py index 3d886716861..7985c0b2282 100644 --- a/pycbc/inference/sampler/ultranest.py +++ b/pycbc/inference/sampler/ultranest.py @@ -61,7 +61,10 @@ class UltranestSampler(BaseSampler): name = "ultranest" _io = UltranestFile - def __init__(self, model, log_dir=None, stepsampling=False, **kwargs): + def __init__(self, model, log_dir=None, + stepsampling=False, + enable_plots=False, + **kwargs): super(UltranestSampler, self).__init__(model) import ultranest @@ -71,12 +74,13 @@ def __init__(self, model, log_dir=None, stepsampling=False, **kwargs): list(self.model.variable_params), log_likelihood_call, prior_call, log_dir=log_dir, resume=True) - + if stepsampling: import ultranest.stepsampler self._sampler.stepsampler = ultranest.stepsampler.RegionBallSliceSampler( nsteps=100, adaptive_nsteps='move-distance', region_filter=True) + self.enable_plots = enable_plots self.nlive = 0 self.ndim = len(self.model.variable_params) self.result = None @@ -85,7 +89,9 @@ def __init__(self, model, log_dir=None, stepsampling=False, **kwargs): def run(self): self.result = self._sampler.run(**self.kwargs) self._sampler.print_results() - self._sampler.plot() + + if self.enable_plots: + self._sampler.plot() @property @@ -113,6 +119,9 @@ def from_config(cls, cp, model, output_file=None, **kwds): 'min_ess': int, 'max_iters': int, 'max_ncalls': int, + 'log_dir': str, + 'stepsampling': bool, + 'enable_plots': bool, 'max_num_improvement_loops': int, 'min_num_live_points': int, 'cluster_num_live_points:': int} From 9c63311e3bf8bee509c58824ab48e2e07f75d58b Mon Sep 17 00:00:00 2001 From: Alexander Harvey Nitz Date: Mon, 22 Jun 2020 20:41:39 +0200 Subject: [PATCH 11/68] cc --- pycbc/inference/sampler/ultranest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycbc/inference/sampler/ultranest.py b/pycbc/inference/sampler/ultranest.py index 7985c0b2282..c3f09bb22f4 100644 --- a/pycbc/inference/sampler/ultranest.py +++ b/pycbc/inference/sampler/ultranest.py @@ -78,7 +78,8 @@ def __init__(self, model, log_dir=None, if stepsampling: import ultranest.stepsampler self._sampler.stepsampler = ultranest.stepsampler.RegionBallSliceSampler( - nsteps=100, adaptive_nsteps='move-distance', region_filter=True) + nsteps=100, adaptive_nsteps='move-distance', + region_filter=True) self.enable_plots = enable_plots self.nlive = 0 @@ -89,11 +90,10 @@ def __init__(self, model, log_dir=None, def run(self): self.result = self._sampler.run(**self.kwargs) self._sampler.print_results() - + if self.enable_plots: self._sampler.plot() - @property def io(self): return self._io From a0b553a838df811300d6bb46d203602a5818d17e Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Tue, 23 Jun 2020 15:05:53 +0200 Subject: [PATCH 12/68] Update setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 608bfc93367..c2ce7a011a8 100755 --- a/setup.py +++ b/setup.py @@ -124,8 +124,8 @@ def __getattr__(self, attr): vinfo = _version_helper.generate_git_version_info() except: vinfo = vdummy() - vinfo.version = '1.16.dev4' - vinfo.release = 'False' + vinfo.version = '1.16.4' + vinfo.release = 'True' with open('pycbc/version.py', 'w') as f: f.write("# coding: utf-8\n") From 2304406c1ddce25d51a7f27dd7658c69852130aa Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Tue, 23 Jun 2020 15:12:24 +0200 Subject: [PATCH 13/68] return to dev --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c2ce7a011a8..e814d99c08e 100755 --- a/setup.py +++ b/setup.py @@ -124,8 +124,8 @@ def __getattr__(self, attr): vinfo = _version_helper.generate_git_version_info() except: vinfo = vdummy() - vinfo.version = '1.16.4' - vinfo.release = 'True' + vinfo.version = '1.16.dev5' + vinfo.release = 'False' with open('pycbc/version.py', 'w') as f: f.write("# coding: utf-8\n") From 9416e6f8f3a919f79d6950f0472a5cd4e99e1c29 Mon Sep 17 00:00:00 2001 From: SSastro-GW <66710757+SSastro-GW@users.noreply.github.com> Date: Wed, 17 Jun 2020 09:07:12 +0530 Subject: [PATCH 14/68] LISA_detector --- pycbc/LISA_detector.py | 103 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 pycbc/LISA_detector.py diff --git a/pycbc/LISA_detector.py b/pycbc/LISA_detector.py new file mode 100644 index 00000000000..ed23ae96007 --- /dev/null +++ b/pycbc/LISA_detector.py @@ -0,0 +1,103 @@ +from numpy import sin,cos,pi +from astropy import coordinates,constants +from astropy import units as u +from astropy.time import Time +import numpy as np +import lal, lalsimulation + +#-----------------------------------------------------COORDIANTE TRANSFORMATION----------------------------------------------------- +def from_icrs_to_gcrs(icrs_coord): + if isinstance(icrs_coord,np.ndarray) and gcrs_coord.shape==(3,): + x,y,z=icrs_coord + return coordinates.SkyCoord(x,y,z,unit='AU',representation_type='cartesian',frame='icrs').transform_to('gcrs') + elif icrs_coord.frame is 'icrs': + return icrs_coord.transform_to('gcrs') + else : + raise RuntimeError("1") + +def from_gcrs_to_icrs(gcrs_coord): + if isinstance(gcrs_coord,np.ndarray) and gcrs_coord.shape==(3,):#Add unit section + x,y,z=gcrs_coord + return coordinates.SkyCoord(x,y,z,unit='AU',representation_type='cartesian',frame='gcrs').transform_to('icrs') + elif gcrs_coord.frame is 'gcrs': + return gcrs_coord.transform_to('icrs') + else : + raise RuntimeError("1") + +class LISA(object): + def __init__(self,t_gps,kappa,_lambda_): + self.t_gps=t_gps + self.kappa=kappa + self._lambda_=_lambda_ +#-----------------------------------------------------DETECTOR POSITION----------------------------------------------------- + def get_pos_detector(self,plot=False): + t=Time(val=self.t_gps,format='gps',scale='utc').to_datetime(timezone=None) + t_ref = np.array([2034-t.year,t.month/12,t.day/(12*365),t.hour/(12*365*24),t.minute/(12*365*24*60),t.second/(12*365*24*60*60),t.microsecond/(12*365*24*60*60*1e-6)]) + t_ref = np.sum(t_ref,axis=0) + n=np.array(range(1,4)) + alpha=2.*pi*t_ref/1+self.kappa + beta_n=(n-1)+2.*pi/3+self._lambda_ + a, L = 1.,.1 #*u.AU + e = L/(2.*a*np.sqrt(3)) + + # pos[0],pos[1],pos[2] = X, Y, Z for all 3 detectors at one time + + pos = np.array([a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n)-(1 + sin(alpha)**2)*cos(beta_n)), + a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n)-(1 + cos(alpha)**2)*sin(beta_n)), + -np.sqrt(3)*a*e*cos(alpha - beta_n)]) + + if plot: + + from mpl_toolkits.mplot3d import Axes3D + import matplotlib.pyplot as plt + + #ax.scatter(pos[0],pos[1],pos[2], marker='o')# X,Y,Z at current time + t=np.arange(0,10,.1) + x_1=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[0])-(1 + sin(t)**2)*cos(beta_n[0])) + y_1=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[0])-(1 + cos(t)**2)*sin(beta_n[0])) + z_1=-np.sqrt(3)*a*e*cos(t - beta_n[0]) + x_2=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[1])-(1 + sin(t)**2)*cos(beta_n[1])) + y_2=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[1])-(1 + cos(t)**2)*sin(beta_n[1])) + z_2=-np.sqrt(3)*a*e*cos(t - beta_n[1]) + x_3=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[2])-(1 + sin(t)**2)*cos(beta_n[2])) + y_3=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[2])-(1 + cos(t)**2)*sin(beta_n[2])) + z_3=-np.sqrt(3)*a*e*cos(t - beta_n[2]) + rand_pt=from_gcrs_to_icrs(np.zeros(3)) + t=Time(val=self.t_gps,format='gps',scale='utc') + sun=coordinates.get_sun(t).transform_to(frame='icrs') + sun.representation_type, rand_pt.representation_type='cartesian', 'cartesian' + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.scatter(np.float32(rand_pt.x),np.float32(rand_pt.y),np.float32(rand_pt.z),marker=',') + ax.scatter(np.float32(sun.x),np.float32(sun.y),np.float32(sun.z), marker='h') + ax.scatter(x_1,y_1,z_1, marker='o') + ax.scatter(x_2,y_2,z_2, marker='+') + ax.scatter(x_3,y_3,z_3, marker='*') + + coord_ICRS=coordinates.SkyCoord(pos[0],pos[1],pos[2],unit=u.AU,representation_type='cartesian',frame='icrs') + return coord_ICRS + #return np.array([x,y,z]) + +#--------------------------------------------------DISTANCE FROM DETECTOR------------------------------------------- + + def light_travel_time_to_detector(self,det,ref_time): + if isinstance(det,str): #if ref_time is None: + det_loc=from_gcrs_to_icrs(lalsimulation.DetectorPrefixToLALDetector('H1').location*6.6846e-12) + det_loc.representation_type='cartesian' + _a_=np.array([np.float32(det_loc.x),np.float32(det_loc.y),np.float32(det_loc.z)]) + L_pos=LISA(ref_time,self.kappa,self._lambda_).get_pos_detector() + _b_=np.array([np.float32(L_pos.x),np.float32(L_pos.y),np.float32(L_pos.z)]) + d=_a_-_b_ + return d.dot(d*0.5)/constants.c.value + +#--------------------------------------------------DISTANCE FROM LOCATION------------------------------------------- + + def light_time_delay_from_location(self,ref_time,other_location): #similar to time_delay_from_location from pycbc.detector + L_pos=LISA(ref_time,self.kappa,self._lambda_).get_pos_detector() # UNIT AND Coordinate system and time + _b_=np.array([np.float32(L_pos.x),np.float32(L_pos.y),np.float32(L_pos.z)]) + if isinstance(other_location,np.ndarray): + _a_=other_location + elif isinstance(other_location,coordinates.SkyCoord): + _a_=np.array([np.float32(other_location.x),np.float32(other_location.y),np.float32(other_location.z)]) + d=_a_-_b_ + return d.dot(d*0.5)/constants.c.value From 4c9716c73318ea04daaaa36570f1398f903e0007 Mon Sep 17 00:00:00 2001 From: SSastro00 <66954019+SSastro00@users.noreply.github.com> Date: Thu, 18 Jun 2020 07:52:23 +0530 Subject: [PATCH 15/68] Update LISA_detector.py --- pycbc/LISA_detector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycbc/LISA_detector.py b/pycbc/LISA_detector.py index ed23ae96007..e089cb100bb 100644 --- a/pycbc/LISA_detector.py +++ b/pycbc/LISA_detector.py @@ -1,5 +1,5 @@ -from numpy import sin,cos,pi -from astropy import coordinates,constants +from numpy import sin, cos, pi +from astropy import coordinates, constants from astropy import units as u from astropy.time import Time import numpy as np From dc75feaa59349e849faba82fffcf36fb06851cc4 Mon Sep 17 00:00:00 2001 From: SSastro00 <66954019+SSastro00@users.noreply.github.com> Date: Thu, 18 Jun 2020 10:39:22 +0530 Subject: [PATCH 16/68] Changes by me --- pycbc/detector.py | 112 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 15 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index a95e4a1aeaf..63847f6c680 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -28,15 +28,15 @@ """This module provides utilities for calculating detector responses and timing between observatories. """ +import lal import lalsimulation import numpy as np -import lal from pycbc.types import TimeSeries from astropy.time import Time -from astropy import constants +from astropy import constants, coordinates from astropy.units.si import sday -from numpy import cos, sin - +from numpy import cos, sin, pi +from astropy import units as u # Response functions are modelled after those in lalsuite and as also # presented in https://arxiv.org/pdf/gr-qc/0008066.pdf @@ -82,17 +82,81 @@ def __init__(self, detector_name, reference_time=1126259462.0): using a slower but higher precision method. """ - self.name = str(detector_name) - self.frDetector = lalsimulation.DetectorPrefixToLALDetector(self.name) - self.response = self.frDetector.response - self.location = self.frDetector.location - self.latitude = self.frDetector.frDetector.vertexLatitudeRadians - self.longitude = self.frDetector.frDetector.vertexLongitudeRadians - self.reference_time = reference_time self.sday = None self.gmst_reference = None - + self.name = str(detector_name) + + if self.name is 'LISA': + t=Time(val=self.reference_time, format='gps', scale='utc').to_datetime(timezone=None) + t=np.sum(np.array( + [t.year - 2034, t.month/12, t.day/(12 * 365), + t.hour/(12 * 365 * 24), t.minute/(12 * 365 * 24 * 60), + t.second/(12 * 365 * 24 * 60 * 60), + t.microsecond/(12 * 365 * 24 * 60 * 60 * 1e-6)] + ), axis=0) + + n=np.array(range(1, 4)) + kappa, _lambda_ = 0, 0 + alpha=2. * np.pi * t_ref/1 + kappa + beta_n=(n - 1) + (2. * np.pi/3) + _lambda_ + a, L = 1., .1 # units are in AU + e = L/(2. * a * np.sqrt(3)) + + """ 3 x 3 array (0,0)-> x coord for 1st detector, + (0,1)-> x coord for 1st detector,""" + + self.location = np.array( + [a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + sin(alpha)**2)*cos(beta_n)), + a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + cos(alpha)**2)*sin(beta_n)), + -np.sqrt(3)*a*e*cos(alpha - beta_n)]).transpose() + + else: + + self.frDetector = lalsimulation.DetectorPrefixToLALDetector(self.name) + self.response = self.frDetector.response + self.location = self.frDetector.location + self.latitude = self.frDetector.frDetector.vertexLatitudeRadians + self.longitude = self.frDetector.frDetector.vertexLongitudeRadians + + def plot_LISA_orbit(self): + + """ Plots the LISA orbit for 1 year along with Sun and Earth's position""" + + from mpl_toolkits.mplot3d import Axes3D + import matplotlib.pyplot as plt + t = np.arange(0, 1, .01) + n = np.array(range(1, 4)) + beta_n=(n - 1) + 2. * pi/3 + a, L = 1., .1 + e = L/(2. * a * np.sqrt(3)) + orbit_1=np.array( + [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[0])), + a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[0])), + -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[0])]) + + orbit_2=np.array( + [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[1])), + a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[1])), + -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[1])]) + + orbit_3=np.array( + [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + sin(2.*pi*t)**2)*cos(beta_n[2])), + a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + cos(2.*pi*t)**2)*sin(beta_n[2])), + -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[2])]) + + t = Time(val=self.reference_time, format='gps', scale='utc') + earth=coordinates.get_body('earth', t, location=None).transform_to('icrs') + sun.representation_type, earth.representation_type ='cartesian', 'cartesian' + + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.scatter(np.float32(earth.x), np.float32(earth.y), np.float32(earth.z), marker=',') + ax.scatter(np.float32(sun.x), np.float32(sun.y), np.float32(sun.z), marker='h') + ax.scatter(orbit_1[0], orbit_1[1] ,orbit_1[2], marker='.') + ax.scatter(orbit_2[0], orbit_1[1] ,orbit_1[2], marker='+') + ax.scatter(orbit_3[0], orbit_1[1] ,orbit_1[2], marker='*') + def set_gmst_reference(self): if self.reference_time is not None: self.sday = float(sday.si.scale) @@ -212,15 +276,31 @@ def time_delay_from_location(self, other_location, right_ascension, float The arrival time difference between the detectors. """ + if self.name is 'LISA': + pos = coordinates.SkyCoord(x=other_location[0], y=other_location[1], + z=other_location[0], unit=u.AU, + frame='gcrs').transform_to('icrs') + + """signal = coordinates.SkyCoord(ra = right_ascension, dec = declination, + unit=u.rad, frame='gcrs').transform_to('icrs') + Skipped assuming the user will enter in ICRS form only + + x = other_location[0] - self.location[:,0] + y = other_location[1] - self.location[:,1] + z = other_location[2] - self.location[:,2]""" + + dx = np.array([other_location[0] - self.location[:,0], + other_location[1] - self.location[:,1], + other_location[2] - self.location[:,2]]) + else: + dx = other_location - self.location + ra_angle = self.gmst_estimate(t_gps) - right_ascension cosd = cos(declination) - e0 = cosd * cos(ra_angle) e1 = cosd * -sin(ra_angle) e2 = sin(declination) - ehat = np.array([e0, e1, e2]) - dx = other_location - self.location return dx.dot(ehat) / constants.c.value def time_delay_from_detector(self, other_detector, right_ascension, @@ -247,6 +327,8 @@ def time_delay_from_detector(self, other_detector, right_ascension, float The arrival time difference between the detectors. """ + if self.name is 'LISA': + return self.time_delay_from_location(other_detector.location, right_ascension, declination, From 5a906288afdf0675399f16596111721d2d316979 Mon Sep 17 00:00:00 2001 From: SSastro00 <66954019+SSastro00@users.noreply.github.com> Date: Thu, 18 Jun 2020 10:41:32 +0530 Subject: [PATCH 17/68] Delete LISA_detector.py --- pycbc/LISA_detector.py | 103 ----------------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 pycbc/LISA_detector.py diff --git a/pycbc/LISA_detector.py b/pycbc/LISA_detector.py deleted file mode 100644 index e089cb100bb..00000000000 --- a/pycbc/LISA_detector.py +++ /dev/null @@ -1,103 +0,0 @@ -from numpy import sin, cos, pi -from astropy import coordinates, constants -from astropy import units as u -from astropy.time import Time -import numpy as np -import lal, lalsimulation - -#-----------------------------------------------------COORDIANTE TRANSFORMATION----------------------------------------------------- -def from_icrs_to_gcrs(icrs_coord): - if isinstance(icrs_coord,np.ndarray) and gcrs_coord.shape==(3,): - x,y,z=icrs_coord - return coordinates.SkyCoord(x,y,z,unit='AU',representation_type='cartesian',frame='icrs').transform_to('gcrs') - elif icrs_coord.frame is 'icrs': - return icrs_coord.transform_to('gcrs') - else : - raise RuntimeError("1") - -def from_gcrs_to_icrs(gcrs_coord): - if isinstance(gcrs_coord,np.ndarray) and gcrs_coord.shape==(3,):#Add unit section - x,y,z=gcrs_coord - return coordinates.SkyCoord(x,y,z,unit='AU',representation_type='cartesian',frame='gcrs').transform_to('icrs') - elif gcrs_coord.frame is 'gcrs': - return gcrs_coord.transform_to('icrs') - else : - raise RuntimeError("1") - -class LISA(object): - def __init__(self,t_gps,kappa,_lambda_): - self.t_gps=t_gps - self.kappa=kappa - self._lambda_=_lambda_ -#-----------------------------------------------------DETECTOR POSITION----------------------------------------------------- - def get_pos_detector(self,plot=False): - t=Time(val=self.t_gps,format='gps',scale='utc').to_datetime(timezone=None) - t_ref = np.array([2034-t.year,t.month/12,t.day/(12*365),t.hour/(12*365*24),t.minute/(12*365*24*60),t.second/(12*365*24*60*60),t.microsecond/(12*365*24*60*60*1e-6)]) - t_ref = np.sum(t_ref,axis=0) - n=np.array(range(1,4)) - alpha=2.*pi*t_ref/1+self.kappa - beta_n=(n-1)+2.*pi/3+self._lambda_ - a, L = 1.,.1 #*u.AU - e = L/(2.*a*np.sqrt(3)) - - # pos[0],pos[1],pos[2] = X, Y, Z for all 3 detectors at one time - - pos = np.array([a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n)-(1 + sin(alpha)**2)*cos(beta_n)), - a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n)-(1 + cos(alpha)**2)*sin(beta_n)), - -np.sqrt(3)*a*e*cos(alpha - beta_n)]) - - if plot: - - from mpl_toolkits.mplot3d import Axes3D - import matplotlib.pyplot as plt - - #ax.scatter(pos[0],pos[1],pos[2], marker='o')# X,Y,Z at current time - t=np.arange(0,10,.1) - x_1=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[0])-(1 + sin(t)**2)*cos(beta_n[0])) - y_1=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[0])-(1 + cos(t)**2)*sin(beta_n[0])) - z_1=-np.sqrt(3)*a*e*cos(t - beta_n[0]) - x_2=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[1])-(1 + sin(t)**2)*cos(beta_n[1])) - y_2=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[1])-(1 + cos(t)**2)*sin(beta_n[1])) - z_2=-np.sqrt(3)*a*e*cos(t - beta_n[1]) - x_3=a*cos(t) + a*e*(sin(t)*cos(t)*sin(beta_n[2])-(1 + sin(t)**2)*cos(beta_n[2])) - y_3=a*sin(t) + a*e*(sin(t)*cos(t)*sin(beta_n[2])-(1 + cos(t)**2)*sin(beta_n[2])) - z_3=-np.sqrt(3)*a*e*cos(t - beta_n[2]) - rand_pt=from_gcrs_to_icrs(np.zeros(3)) - t=Time(val=self.t_gps,format='gps',scale='utc') - sun=coordinates.get_sun(t).transform_to(frame='icrs') - sun.representation_type, rand_pt.representation_type='cartesian', 'cartesian' - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - ax.scatter(np.float32(rand_pt.x),np.float32(rand_pt.y),np.float32(rand_pt.z),marker=',') - ax.scatter(np.float32(sun.x),np.float32(sun.y),np.float32(sun.z), marker='h') - ax.scatter(x_1,y_1,z_1, marker='o') - ax.scatter(x_2,y_2,z_2, marker='+') - ax.scatter(x_3,y_3,z_3, marker='*') - - coord_ICRS=coordinates.SkyCoord(pos[0],pos[1],pos[2],unit=u.AU,representation_type='cartesian',frame='icrs') - return coord_ICRS - #return np.array([x,y,z]) - -#--------------------------------------------------DISTANCE FROM DETECTOR------------------------------------------- - - def light_travel_time_to_detector(self,det,ref_time): - if isinstance(det,str): #if ref_time is None: - det_loc=from_gcrs_to_icrs(lalsimulation.DetectorPrefixToLALDetector('H1').location*6.6846e-12) - det_loc.representation_type='cartesian' - _a_=np.array([np.float32(det_loc.x),np.float32(det_loc.y),np.float32(det_loc.z)]) - L_pos=LISA(ref_time,self.kappa,self._lambda_).get_pos_detector() - _b_=np.array([np.float32(L_pos.x),np.float32(L_pos.y),np.float32(L_pos.z)]) - d=_a_-_b_ - return d.dot(d*0.5)/constants.c.value - -#--------------------------------------------------DISTANCE FROM LOCATION------------------------------------------- - - def light_time_delay_from_location(self,ref_time,other_location): #similar to time_delay_from_location from pycbc.detector - L_pos=LISA(ref_time,self.kappa,self._lambda_).get_pos_detector() # UNIT AND Coordinate system and time - _b_=np.array([np.float32(L_pos.x),np.float32(L_pos.y),np.float32(L_pos.z)]) - if isinstance(other_location,np.ndarray): - _a_=other_location - elif isinstance(other_location,coordinates.SkyCoord): - _a_=np.array([np.float32(other_location.x),np.float32(other_location.y),np.float32(other_location.z)]) - d=_a_-_b_ - return d.dot(d*0.5)/constants.c.value From 68d7e1eeb901930bd437bf992a70023e298955a4 Mon Sep 17 00:00:00 2001 From: SSastro00 <66954019+SSastro00@users.noreply.github.com> Date: Thu, 18 Jun 2020 10:45:56 +0530 Subject: [PATCH 18/68] Update detector.py --- pycbc/detector.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pycbc/detector.py b/pycbc/detector.py index 63847f6c680..68cf050e068 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -328,6 +328,14 @@ def time_delay_from_detector(self, other_detector, right_ascension, The arrival time difference between the detectors. """ if self.name is 'LISA': + other_location.location=coordinates.SkyCoord( + other_detector.location[0], other_detector.location[1], + other_detector.location[2], frame = 'gcrs').transform_to('icrs') + + return self.time_delay_from_location(other_detector.location, + right_ascension, + declination, + t_gps) return self.time_delay_from_location(other_detector.location, right_ascension, From e32c9d8e82be6d753f1d2ad032eb40437f601064 Mon Sep 17 00:00:00 2001 From: SSastro-GW <66710757+SSastro-GW@users.noreply.github.com> Date: Thu, 18 Jun 2020 10:51:25 +0530 Subject: [PATCH 19/68] Update detector.py --- pycbc/detector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 68cf050e068..a51c27e8dc0 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -28,9 +28,9 @@ """This module provides utilities for calculating detector responses and timing between observatories. """ -import lal import lalsimulation import numpy as np +import lal from pycbc.types import TimeSeries from astropy.time import Time from astropy import constants, coordinates From 0da01f16a2dca3c3d4316ad19ab0a1a13162641f Mon Sep 17 00:00:00 2001 From: SSastro-GW <66710757+SSastro-GW@users.noreply.github.com> Date: Thu, 18 Jun 2020 11:47:25 +0530 Subject: [PATCH 20/68] detector.py change_1: 18/6 --- pycbc/detector.py | 49 ++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index a51c27e8dc0..e3722e232ed 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -36,7 +36,7 @@ from astropy import constants, coordinates from astropy.units.si import sday from numpy import cos, sin, pi -from astropy import units as u +from astropy import units # Response functions are modelled after those in lalsuite and as also # presented in https://arxiv.org/pdf/gr-qc/0008066.pdf @@ -86,43 +86,44 @@ def __init__(self, detector_name, reference_time=1126259462.0): self.sday = None self.gmst_reference = None self.name = str(detector_name) - + if self.name is 'LISA': - t=Time(val=self.reference_time, format='gps', scale='utc').to_datetime(timezone=None) - t=np.sum(np.array( - [t.year - 2034, t.month/12, t.day/(12 * 365), - t.hour/(12 * 365 * 24), t.minute/(12 * 365 * 24 * 60), - t.second/(12 * 365 * 24 * 60 * 60), - t.microsecond/(12 * 365 * 24 * 60 * 60 * 1e-6)] - ), axis=0) - - n=np.array(range(1, 4)) + t = Time(val =s elf.reference_time, format = 'gps', + scale = 'utc').to_datetime(timezone=None) + t = np.sum(np.array([t.year - 2034, t.month/12, t.day/(12*365), + t.hour/(12*365*24), + t.minute/(12*365*24*60), + t.second/(12*365*24*60*60), + t.microsecond/(12*365*24*60*60*1e-6)]), axis=0) + + n = np.array(range(1, 4)) kappa, _lambda_ = 0, 0 - alpha=2. * np.pi * t_ref/1 + kappa - beta_n=(n - 1) + (2. * np.pi/3) + _lambda_ + alpha = 2. * np.pi * t_ref/1 + kappa + beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ a, L = 1., .1 # units are in AU e = L/(2. * a * np.sqrt(3)) - + """ 3 x 3 array (0,0)-> x coord for 1st detector, (0,1)-> x coord for 1st detector,""" - + self.location = np.array( [a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + sin(alpha)**2)*cos(beta_n)), a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + cos(alpha)**2)*sin(beta_n)), -np.sqrt(3)*a*e*cos(alpha - beta_n)]).transpose() - + else: - - self.frDetector = lalsimulation.DetectorPrefixToLALDetector(self.name) + + self.frDetector = lalsimulation.DetectorPrefixToLALDetector( + self.name) self.response = self.frDetector.response self.location = self.frDetector.location self.latitude = self.frDetector.frDetector.vertexLatitudeRadians self.longitude = self.frDetector.frDetector.vertexLongitudeRadians def plot_LISA_orbit(self): - - """ Plots the LISA orbit for 1 year along with Sun and Earth's position""" - + + """ Plots the LISA orbit for 1 year along with Sun and Earth's position""" + from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt t = np.arange(0, 1, .01) @@ -134,12 +135,12 @@ def plot_LISA_orbit(self): [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[0])), a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[0])), -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[0])]) - + orbit_2=np.array( [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[1])), a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[1])), -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[1])]) - + orbit_3=np.array( [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + sin(2.*pi*t)**2)*cos(beta_n[2])), a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + cos(2.*pi*t)**2)*sin(beta_n[2])), @@ -148,7 +149,7 @@ def plot_LISA_orbit(self): t = Time(val=self.reference_time, format='gps', scale='utc') earth=coordinates.get_body('earth', t, location=None).transform_to('icrs') sun.representation_type, earth.representation_type ='cartesian', 'cartesian' - + fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(np.float32(earth.x), np.float32(earth.y), np.float32(earth.z), marker=',') From 96a0a4a86bf9bdfcdf87302aa9c190980210ef91 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Fri, 19 Jun 2020 09:14:34 +0530 Subject: [PATCH 21/68] detector.py commit_1 19/6 added a class of LISA to detector.py --- pycbc/detector.py | 226 +++++++++++++++++++++------------------------- 1 file changed, 104 insertions(+), 122 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index e3722e232ed..a702c2fe851 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -33,10 +33,10 @@ import lal from pycbc.types import TimeSeries from astropy.time import Time -from astropy import constants, coordinates +from astropy import constants from astropy.units.si import sday -from numpy import cos, sin, pi -from astropy import units +from numpy import cos, sin + # Response functions are modelled after those in lalsuite and as also # presented in https://arxiv.org/pdf/gr-qc/0008066.pdf @@ -48,7 +48,6 @@ def gmst_accurate(gps_time): def get_available_detectors(): """Return list of detectors known in the currently sourced lalsuite. - This function will query lalsuite about which detectors are known to lalsuite. Detectors are identified by a two character string e.g. 'K1', but also by a longer, and clearer name, e.g. KAGRA. This function returns @@ -70,7 +69,6 @@ class Detector(object): """ def __init__(self, detector_name, reference_time=1126259462.0): """ Create class representing a gravitational-wave detector - Parameters ---------- detector_name: str @@ -80,84 +78,18 @@ def __init__(self, detector_name, reference_time=1126259462.0): will be estimated from a reference time. If 'None', we will calculate the time for each gps time requested explicitly using a slower but higher precision method. - """ + self.name = str(detector_name) + self.frDetector = lalsimulation.DetectorPrefixToLALDetector(self.name) + self.response = self.frDetector.response + self.location = self.frDetector.location + self.latitude = self.frDetector.frDetector.vertexLatitudeRadians + self.longitude = self.frDetector.frDetector.vertexLongitudeRadians + self.reference_time = reference_time self.sday = None self.gmst_reference = None - self.name = str(detector_name) - - if self.name is 'LISA': - t = Time(val =s elf.reference_time, format = 'gps', - scale = 'utc').to_datetime(timezone=None) - t = np.sum(np.array([t.year - 2034, t.month/12, t.day/(12*365), - t.hour/(12*365*24), - t.minute/(12*365*24*60), - t.second/(12*365*24*60*60), - t.microsecond/(12*365*24*60*60*1e-6)]), axis=0) - - n = np.array(range(1, 4)) - kappa, _lambda_ = 0, 0 - alpha = 2. * np.pi * t_ref/1 + kappa - beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ - a, L = 1., .1 # units are in AU - e = L/(2. * a * np.sqrt(3)) - - """ 3 x 3 array (0,0)-> x coord for 1st detector, - (0,1)-> x coord for 1st detector,""" - - self.location = np.array( - [a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + sin(alpha)**2)*cos(beta_n)), - a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + cos(alpha)**2)*sin(beta_n)), - -np.sqrt(3)*a*e*cos(alpha - beta_n)]).transpose() - - else: - - self.frDetector = lalsimulation.DetectorPrefixToLALDetector( - self.name) - self.response = self.frDetector.response - self.location = self.frDetector.location - self.latitude = self.frDetector.frDetector.vertexLatitudeRadians - self.longitude = self.frDetector.frDetector.vertexLongitudeRadians - - def plot_LISA_orbit(self): - - """ Plots the LISA orbit for 1 year along with Sun and Earth's position""" - - from mpl_toolkits.mplot3d import Axes3D - import matplotlib.pyplot as plt - t = np.arange(0, 1, .01) - n = np.array(range(1, 4)) - beta_n=(n - 1) + 2. * pi/3 - a, L = 1., .1 - e = L/(2. * a * np.sqrt(3)) - orbit_1=np.array( - [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[0])), - a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[0]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[0])), - -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[0])]) - - orbit_2=np.array( - [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + sin(2.*pi*t)**2)*cos(beta_n[1])), - a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[1]) - (1 + cos(2.*pi*t)**2)*sin(beta_n[1])), - -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[1])]) - - orbit_3=np.array( - [a*cos(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + sin(2.*pi*t)**2)*cos(beta_n[2])), - a*sin(2.*pi*t) + a*e*(sin(2.*pi*t)*cos(2.*pi*t)*sin(beta_n[2])-(1 + cos(2.*pi*t)**2)*sin(beta_n[2])), - -np.sqrt(3)*a*e*cos(2.*pi*t - beta_n[2])]) - - t = Time(val=self.reference_time, format='gps', scale='utc') - earth=coordinates.get_body('earth', t, location=None).transform_to('icrs') - sun.representation_type, earth.representation_type ='cartesian', 'cartesian' - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - ax.scatter(np.float32(earth.x), np.float32(earth.y), np.float32(earth.z), marker=',') - ax.scatter(np.float32(sun.x), np.float32(sun.y), np.float32(sun.z), marker='h') - ax.scatter(orbit_1[0], orbit_1[1] ,orbit_1[2], marker='.') - ax.scatter(orbit_2[0], orbit_1[1] ,orbit_1[2], marker='+') - ax.scatter(orbit_3[0], orbit_1[1] ,orbit_1[2], marker='*') - def set_gmst_reference(self): if self.reference_time is not None: self.sday = float(sday.si.scale) @@ -178,12 +110,10 @@ def gmst_estimate(self, gps_time): def light_travel_time_to_detector(self, det): """ Return the light travel time from this detector - Parameters ---------- det: Detector The other detector to determine the light travel time to. - Returns ------- time: float @@ -194,7 +124,6 @@ def light_travel_time_to_detector(self, det): def antenna_pattern(self, right_ascension, declination, polarization, t_gps): """Return the detector response. - Parameters ---------- right_ascension: float or numpy.ndarray @@ -203,7 +132,6 @@ def antenna_pattern(self, right_ascension, declination, polarization, t_gps): The declination of the source polarization: float or numpy.ndarray The polarization angle of the source - Returns ------- fplus: float or numpy.ndarray @@ -256,11 +184,9 @@ def time_delay_from_location(self, other_location, right_ascension, declination, t_gps): """Return the time delay from the given location to detector for a signal with the given sky location - In other words return `t1 - t2` where `t1` is the arrival time in this detector and `t2` is the arrival time in the other location. - Parameters ---------- other_location : numpy.ndarray of coordinates @@ -271,37 +197,20 @@ def time_delay_from_location(self, other_location, right_ascension, The declination (in rad) of the signal. t_gps : float The GPS time (in s) of the signal. - Returns ------- float The arrival time difference between the detectors. """ - if self.name is 'LISA': - pos = coordinates.SkyCoord(x=other_location[0], y=other_location[1], - z=other_location[0], unit=u.AU, - frame='gcrs').transform_to('icrs') - - """signal = coordinates.SkyCoord(ra = right_ascension, dec = declination, - unit=u.rad, frame='gcrs').transform_to('icrs') - Skipped assuming the user will enter in ICRS form only - - x = other_location[0] - self.location[:,0] - y = other_location[1] - self.location[:,1] - z = other_location[2] - self.location[:,2]""" - - dx = np.array([other_location[0] - self.location[:,0], - other_location[1] - self.location[:,1], - other_location[2] - self.location[:,2]]) - else: - dx = other_location - self.location - ra_angle = self.gmst_estimate(t_gps) - right_ascension cosd = cos(declination) + e0 = cosd * cos(ra_angle) e1 = cosd * -sin(ra_angle) e2 = sin(declination) + ehat = np.array([e0, e1, e2]) + dx = other_location - self.location return dx.dot(ehat) / constants.c.value def time_delay_from_detector(self, other_detector, right_ascension, @@ -311,7 +220,6 @@ def time_delay_from_detector(self, other_detector, right_ascension, arrival time in this detector and `t2` is the arrival time in the other detector. Note that this would return the same value as `time_delay_from_earth_center` if `other_detector` was geocentric. - Parameters ---------- other_detector : detector.Detector @@ -322,22 +230,11 @@ def time_delay_from_detector(self, other_detector, right_ascension, The declination (in rad) of the signal. t_gps : float The GPS time (in s) of the signal. - Returns ------- float The arrival time difference between the detectors. """ - if self.name is 'LISA': - other_location.location=coordinates.SkyCoord( - other_detector.location[0], other_detector.location[1], - other_detector.location[2], frame = 'gcrs').transform_to('icrs') - - return self.time_delay_from_location(other_detector.location, - right_ascension, - declination, - t_gps) - return self.time_delay_from_location(other_detector.location, right_ascension, declination, @@ -345,11 +242,9 @@ def time_delay_from_detector(self, other_detector, right_ascension, def project_wave(self, hp, hc, longitude, latitude, polarization): """Return the strain of a waveform as measured by the detector. - Apply the time shift for the given detector relative to the assumed geocentric frame and apply the antenna patterns to the plus and cross polarizations. - """ h_lal = lalsimulation.SimDetectorStrainREAL8TimeSeries( hp.astype(np.float64).lal(), hc.astype(np.float64).lal(), @@ -361,12 +256,10 @@ def project_wave(self, hp, hc, longitude, latitude, polarization): def optimal_orientation(self, t_gps): """Return the optimal orientation in right ascension and declination for a given GPS time. - Parameters ---------- t_gps: float Time in gps seconds - Returns ------- ra: float @@ -385,13 +278,11 @@ def overhead_antenna_pattern(right_ascension, declination, polarization): to the normal to the detector plane (i.e. overhead and underneath) while the point with zero right ascension and declination is the direction of one of the interferometer arms. - Parameters ---------- right_ascension: float declination: float polarization: float - Returns ------- f_plus: float @@ -413,3 +304,94 @@ def overhead_antenna_pattern(right_ascension, declination, polarization): def effective_distance(distance, inclination, f_plus, f_cross): return distance / np.sqrt( ( 1 + np.cos( inclination )**2 )**2 / 4 * f_plus**2 + np.cos( inclination )**2 * f_cross**2 ) +""" LISA class """ + + +class LISA(object): + def __init__(self,kappa,_lambda_,reference_time=1126259462.0): + self.reference_time=reference_time + self.kappa=kappa + self._lambda_=_lambda_ + + def get_pos(self, t_gps): + if t_gps is None: + t_gps = Time(val = self.reference_time, format = 'gps', + scale = 'utc').to_datetime(timezone = None) + elif isinstance(t_gps, np.ScalarType): + t_gps = Time(val = t_gps, format = 'gps', + scale = 'utc').to_datetime(timezone = None) + + t_gps = np.sum(np.array([t_gps.year - 2034, t_gps.month/12, t_gps.day/(12*365), + t_gps.hour/(12*365*24), + t_gps.minute/(12*365*24*60), + t_gps.second/(12*365*24*60*60), + t_gps.microsecond/(12*365*24*60*60*1e-6)]), axis=0) + + n = np.array(range(1, 4)) + kappa, _lambda_ = 0, 0 + alpha = 2. * np.pi * t_gps/1 + kappa + beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ + a, L = 1., .1 # units are in AU + e = L/(2. * a * np.sqrt(3)) + + x = a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + sin(alpha)**2)*cos(beta_n)) + y = a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + cos(alpha)**2)*sin(beta_n)) + z = -np.sqrt(3)*a*e*cos(alpha - beta_n) + self.location = np.array([x,y,z]) + + return self.location + + def plot_orbit(self): + from mpl_toolkits.mplot3d import Axes3D + import matplotlib.pyplot as plt + dec_center = [] + for i in range(2000): + dec_center.append(self.get_pos(self.reference_time + i).mean(axis = 1)) + """ values don't change much with GPS time""" + t = Time(val = self.reference_time, format = 'gps', scale ='utc') + sun = coordinates.get_sun(t).transform_to('icrs') + earth = coordinates.get_body('earth', t, location = None).transform_to('icrs') + sun.representation_type, earth.representation_type ='cartesian', 'cartesian' + + fig = plt.figure() + ax = plt.axes(projection = "3d") + ax.scatter(np.float32(earth.x), np.float32(earth.y), np.float32(earth.z), marker = 'o') + ax.scatter(np.float32(sun.x), np.float32(sun.y), np.float32(sun.z), marker = '+') + ax.scatter(dec_center[0], dec_center[1] ,dec_center[2], marker = '*') + ax.set_xlabel('X axis (AU)') + ax.set_ylabel('Y axis (AU)') + ax.set_zlabel('Z axis (AU)') + + def time_delay_from_location(self, other_location, right_ascension, + declination, t_gps): + dec_loc = self.get_pos(t_gps) + """signal = coordinates.SkyCoord(ra = right_ascension, dec = declination, + unit = u.rad, frame = 'gcrs').transform_to('icrs')""" + + dx = np.array([other_location[0] - self.location[0], + other_location[1] - self.location[1], + other_location[2] - self.location[2]]) + + """ra_angle = self.gmst_estimate(t_gps) - right_ascension""" + cosd = cos(declination) + e0 = cosd * cos(right_ascension) + e1 = cosd * -sin(right_ascension) + e2 = sin(declination) + ehat = np.array([e0, e1, e2]) + return dx.dot(ehat) / constants.c.value + + def time_delay_from_detector(self, other_detector, right_ascension, + declination, t_gps): + return self.time_delay_from_location(other_detector.location, + right_ascension, + declination, + t_gps) + def time_delay_from_earth_center(self, right_ascension, declination, t_gps): + if t_gps is None: + t_gps = Time(val = self.reference_time, format = 'gps', scale ='utc') + else: + t_gps = Time(val = t_gps, format = 'gps', scale ='utc') + earth = coordinates.get_body('earth', t, location = None).transform_to('icrs') + return self.time_delay_from_location( + np.array([np.float32(earth.x), np.float32(earth.y), np.float32(earth.z)]), + right_ascension, declination, t_gps) From 79ff9246f1c918b3d96b37c7a97d48ee22abfdd0 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Fri, 19 Jun 2020 20:57:13 +0530 Subject: [PATCH 22/68] change_2 : 19/6 issues by code climate --- pycbc/detector.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index a702c2fe851..77c4cdfa736 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -304,38 +304,39 @@ def overhead_antenna_pattern(right_ascension, declination, polarization): def effective_distance(distance, inclination, f_plus, f_cross): return distance / np.sqrt( ( 1 + np.cos( inclination )**2 )**2 / 4 * f_plus**2 + np.cos( inclination )**2 * f_cross**2 ) -""" LISA class """ - + """ LISA class """ class LISA(object): - def __init__(self,kappa,_lambda_,reference_time=1126259462.0): - self.reference_time=reference_time - self.kappa=kappa - self._lambda_=_lambda_ + def __init__(self, kappa, _lambda_, reference_time = 1126259462.0): + self.reference_time = reference_time + self.kappa = kappa + self._lambda_ = _lambda_ def get_pos(self, t_gps): if t_gps is None: t_gps = Time(val = self.reference_time, format = 'gps', - scale = 'utc').to_datetime(timezone = None) + scale = 'utc').to_datetime(timezone = None) elif isinstance(t_gps, np.ScalarType): t_gps = Time(val = t_gps, format = 'gps', - scale = 'utc').to_datetime(timezone = None) - - t_gps = np.sum(np.array([t_gps.year - 2034, t_gps.month/12, t_gps.day/(12*365), - t_gps.hour/(12*365*24), - t_gps.minute/(12*365*24*60), - t_gps.second/(12*365*24*60*60), - t_gps.microsecond/(12*365*24*60*60*1e-6)]), axis=0) - + scale = 'utc').to_datetime(timezone = None) + + t_gps = np.sum(np.array([t_gps.year - 2034, t_gps.month/12, + t_gps.day/(4380), + t_gps.hour/(105120), + t_gps.minute/(6307200), + t_gps.second/(37843200), + t_gps.microsecond/(37843200*1e-6)]), axis=0) + n = np.array(range(1, 4)) kappa, _lambda_ = 0, 0 alpha = 2. * np.pi * t_gps/1 + kappa beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ a, L = 1., .1 # units are in AU e = L/(2. * a * np.sqrt(3)) + _prod_ = a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - x = a*cos(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + sin(alpha)**2)*cos(beta_n)) - y = a*sin(alpha) + a*e*(sin(alpha)*cos(alpha)*sin(beta_n) - (1 + cos(alpha)**2)*sin(beta_n)) + x = a*cos(alpha) + _prod_ - (1 + sin(alpha)**2)*cos(beta_n)) + y = a*sin(alpha) + _prod_ - (1 + cos(alpha)**2)*sin(beta_n)) z = -np.sqrt(3)*a*e*cos(alpha - beta_n) self.location = np.array([x,y,z]) @@ -347,7 +348,7 @@ def plot_orbit(self): dec_center = [] for i in range(2000): dec_center.append(self.get_pos(self.reference_time + i).mean(axis = 1)) - """ values don't change much with GPS time""" + t = Time(val = self.reference_time, format = 'gps', scale ='utc') sun = coordinates.get_sun(t).transform_to('icrs') earth = coordinates.get_body('earth', t, location = None).transform_to('icrs') @@ -386,6 +387,7 @@ def time_delay_from_detector(self, other_detector, right_ascension, right_ascension, declination, t_gps) + def time_delay_from_earth_center(self, right_ascension, declination, t_gps): if t_gps is None: t_gps = Time(val = self.reference_time, format = 'gps', scale ='utc') From 5c58909b71c5bdca8d6fc6b6038eb21c02f85179 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Tue, 23 Jun 2020 16:51:15 +0530 Subject: [PATCH 23/68] commit_1: 23/6 Removal of plotting method Addition of transformation method from Ground Based Detectors (GBD) to LISA --- pycbc/detector.py | 95 +++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 65 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 77c4cdfa736..3b9c77adaf6 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -33,7 +33,7 @@ import lal from pycbc.types import TimeSeries from astropy.time import Time -from astropy import constants +from astropy import constants, coordinates, units from astropy.units.si import sday from numpy import cos, sin @@ -304,76 +304,45 @@ def overhead_antenna_pattern(right_ascension, declination, polarization): def effective_distance(distance, inclination, f_plus, f_cross): return distance / np.sqrt( ( 1 + np.cos( inclination )**2 )**2 / 4 * f_plus**2 + np.cos( inclination )**2 * f_cross**2 ) - """ LISA class """ +def dist_lisa_gbd(det, ref_time): + L_pos = LISA().get_pos(ref_time) + if isinstance(det,str): + det = Detector(det, ref_time).location + det = (det*units.m / units.AU).decompose() + if isinstance(det, np.ndarray) and det.shape[0]==3: + det = det + dist = np.array([det[0] - L_pos[0], + det[1] - L_pos[1], + det[2] - L_pos[2]]) + return dist + +""" LISA class """ class LISA(object): - def __init__(self, kappa, _lambda_, reference_time = 1126259462.0): - self.reference_time = reference_time - self.kappa = kappa - self._lambda_ = _lambda_ + def __init__(self): + None def get_pos(self, t_gps): - if t_gps is None: - t_gps = Time(val = self.reference_time, format = 'gps', - scale = 'utc').to_datetime(timezone = None) - elif isinstance(t_gps, np.ScalarType): - t_gps = Time(val = t_gps, format = 'gps', - scale = 'utc').to_datetime(timezone = None) - - t_gps = np.sum(np.array([t_gps.year - 2034, t_gps.month/12, - t_gps.day/(4380), - t_gps.hour/(105120), - t_gps.minute/(6307200), - t_gps.second/(37843200), - t_gps.microsecond/(37843200*1e-6)]), axis=0) + t_gps = Time(val=t_gps, format='gps', scale='utc').jyear n = np.array(range(1, 4)) kappa, _lambda_ = 0, 0 alpha = 2. * np.pi * t_gps/1 + kappa beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ - a, L = 1., .1 # units are in AU + a, L = 1., .1 e = L/(2. * a * np.sqrt(3)) - _prod_ = a*e*(sin(alpha)*cos(alpha)*sin(beta_n) + prod = a*e*(sin(alpha)*cos(alpha)*sin(beta_n)) - x = a*cos(alpha) + _prod_ - (1 + sin(alpha)**2)*cos(beta_n)) - y = a*sin(alpha) + _prod_ - (1 + cos(alpha)**2)*sin(beta_n)) + x = a*cos(alpha) + prod - (1 + sin(alpha)**2)*cos(beta_n) + y = a*sin(alpha) + prod - (1 + cos(alpha)**2)*sin(beta_n) z = -np.sqrt(3)*a*e*cos(alpha - beta_n) - self.location = np.array([x,y,z]) + self.location = np.array([x, y, z]) return self.location - def plot_orbit(self): - from mpl_toolkits.mplot3d import Axes3D - import matplotlib.pyplot as plt - dec_center = [] - for i in range(2000): - dec_center.append(self.get_pos(self.reference_time + i).mean(axis = 1)) - - t = Time(val = self.reference_time, format = 'gps', scale ='utc') - sun = coordinates.get_sun(t).transform_to('icrs') - earth = coordinates.get_body('earth', t, location = None).transform_to('icrs') - sun.representation_type, earth.representation_type ='cartesian', 'cartesian' - - fig = plt.figure() - ax = plt.axes(projection = "3d") - ax.scatter(np.float32(earth.x), np.float32(earth.y), np.float32(earth.z), marker = 'o') - ax.scatter(np.float32(sun.x), np.float32(sun.y), np.float32(sun.z), marker = '+') - ax.scatter(dec_center[0], dec_center[1] ,dec_center[2], marker = '*') - ax.set_xlabel('X axis (AU)') - ax.set_ylabel('Y axis (AU)') - ax.set_zlabel('Z axis (AU)') - def time_delay_from_location(self, other_location, right_ascension, declination, t_gps): - dec_loc = self.get_pos(t_gps) - """signal = coordinates.SkyCoord(ra = right_ascension, dec = declination, - unit = u.rad, frame = 'gcrs').transform_to('icrs')""" - - dx = np.array([other_location[0] - self.location[0], - other_location[1] - self.location[1], - other_location[2] - self.location[2]]) - - """ra_angle = self.gmst_estimate(t_gps) - right_ascension""" + dx = dist_lisa_gbd(other_location, t_gps) cosd = cos(declination) e0 = cosd * cos(right_ascension) e1 = cosd * -sin(right_ascension) @@ -381,19 +350,15 @@ def time_delay_from_location(self, other_location, right_ascension, ehat = np.array([e0, e1, e2]) return dx.dot(ehat) / constants.c.value - def time_delay_from_detector(self, other_detector, right_ascension, + def time_delay_from_detector(self, det, right_ascension, declination, t_gps): - return self.time_delay_from_location(other_detector.location, - right_ascension, - declination, - t_gps) + return self.time_delay_from_location(det, right_ascension, + declination, t_gps) def time_delay_from_earth_center(self, right_ascension, declination, t_gps): - if t_gps is None: - t_gps = Time(val = self.reference_time, format = 'gps', scale ='utc') - else: - t_gps = Time(val = t_gps, format = 'gps', scale ='utc') - earth = coordinates.get_body('earth', t, location = None).transform_to('icrs') + t_gps = Time(val=t_gps, format='gps', scale='utc') + earth = coordinates.get_body('earth', t_gps, location=None).transform_to('icrs') + earth.representation_type = 'cartesian' return self.time_delay_from_location( np.array([np.float32(earth.x), np.float32(earth.y), np.float32(earth.z)]), - right_ascension, declination, t_gps) + right_ascension, declination, t_gps) From f68323f6360f0da2666763de32b9da703da242f2 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Tue, 23 Jun 2020 17:56:04 +0530 Subject: [PATCH 24/68] commit_2: 23/6 --- pycbc/detector.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 3b9c77adaf6..8681ebc0527 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -306,18 +306,20 @@ def effective_distance(distance, inclination, f_plus, f_cross): def dist_lisa_gbd(det, ref_time): L_pos = LISA().get_pos(ref_time) - if isinstance(det,str): + if isinstance(det, str): det = Detector(det, ref_time).location det = (det*units.m / units.AU).decompose() - if isinstance(det, np.ndarray) and det.shape[0]==3: + if isinstance(det, np.ndarray) and det.shape[0] == 3: det = det dist = np.array([det[0] - L_pos[0], det[1] - L_pos[1], det[2] - L_pos[2]]) return dist + """ LISA class """ + class LISA(object): def __init__(self): None @@ -357,8 +359,10 @@ def time_delay_from_detector(self, det, right_ascension, def time_delay_from_earth_center(self, right_ascension, declination, t_gps): t_gps = Time(val=t_gps, format='gps', scale='utc') - earth = coordinates.get_body('earth', t_gps, location=None).transform_to('icrs') + earth = coordinates.get_body('earth', t_gps, + location=None).transform_to('icrs') earth.representation_type = 'cartesian' return self.time_delay_from_location( - np.array([np.float32(earth.x), np.float32(earth.y), np.float32(earth.z)]), - right_ascension, declination, t_gps) + np.array([np.float32(earth.x), np.float32(earth.y), + np.float32(earth.z)]), right_ascension, + declination, t_gps) From bacbc2b81c7e2879256cd5e21e940fcba27c94d3 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Wed, 24 Jun 2020 10:29:39 +0530 Subject: [PATCH 25/68] commit_1: 24/6 Changes in coordinate transfromation --- pycbc/detector.py | 49 +++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 8681ebc0527..813bc149f4a 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -271,6 +271,16 @@ def optimal_orientation(self, t_gps): dec = self.latitude return ra, dec + def get_icrs_pos(self): + loc = self.location + loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.m, + frame='gcrs', representation_type='cartesian').transform_to('icrs') + loc.representation_type = 'cartesian' + conv = np.float32(((loc.x.unit/units.AU).decompose()).to_string()) + loc = np.array([np.float32(loc.x), np.float32(loc.y), + np.float32(loc.z)])*conv + return loc + def overhead_antenna_pattern(right_ascension, declination, polarization): """Return the antenna pattern factors F+ and Fx as a function of sky location and polarization angle for a hypothetical interferometer located @@ -304,18 +314,6 @@ def overhead_antenna_pattern(right_ascension, declination, polarization): def effective_distance(distance, inclination, f_plus, f_cross): return distance / np.sqrt( ( 1 + np.cos( inclination )**2 )**2 / 4 * f_plus**2 + np.cos( inclination )**2 * f_cross**2 ) -def dist_lisa_gbd(det, ref_time): - L_pos = LISA().get_pos(ref_time) - if isinstance(det, str): - det = Detector(det, ref_time).location - det = (det*units.m / units.AU).decompose() - if isinstance(det, np.ndarray) and det.shape[0] == 3: - det = det - dist = np.array([det[0] - L_pos[0], - det[1] - L_pos[1], - det[2] - L_pos[2]]) - return dist - """ LISA class """ @@ -324,12 +322,12 @@ class LISA(object): def __init__(self): None - def get_pos(self, t_gps): - t_gps = Time(val=t_gps, format='gps', scale='utc').jyear + def get_pos(self, ref_time): + ref_time = 2034 - Time(val=ref_time, format='gps', scale='utc').jyear n = np.array(range(1, 4)) kappa, _lambda_ = 0, 0 - alpha = 2. * np.pi * t_gps/1 + kappa + alpha = 2. * np.pi * ref_time/1 + kappa beta_n = (n - 1) + (2. * np.pi/3) + _lambda_ a, L = 1., .1 e = L/(2. * a * np.sqrt(3)) @@ -342,9 +340,21 @@ def get_pos(self, t_gps): return self.location + def get_gcrs_pos(self, loc): + loc = self.location + loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.AU, + frame='gcrs', representation_type='cartesian').transform_to('icrs') + loc.representation_type = 'cartesian' + conv = np.float32(((loc.x.unit/units.m).decompose()).to_string()) + loc = np.array([np.float32(loc.x), np.float32(loc.y), + np.float32(loc.z)])*conv + return loc + def time_delay_from_location(self, other_location, right_ascension, declination, t_gps): - dx = dist_lisa_gbd(other_location, t_gps) + dx = np.array([self.location[0] - other_location[0], + self.location[1] - other_location[1], + self.location[2] - other_location[2]]) cosd = cos(declination) e0 = cosd * cos(right_ascension) e1 = cosd * -sin(right_ascension) @@ -354,15 +364,16 @@ def time_delay_from_location(self, other_location, right_ascension, def time_delay_from_detector(self, det, right_ascension, declination, t_gps): - return self.time_delay_from_location(det, right_ascension, + loc = Detector(det, t_gps).get_icrs_pos() + return self.time_delay_from_location(loc, right_ascension, declination, t_gps) def time_delay_from_earth_center(self, right_ascension, declination, t_gps): t_gps = Time(val=t_gps, format='gps', scale='utc') - earth = coordinates.get_body('earth', t_gps, + earth = coordinates.get_body('earth', t_gps, location=None).transform_to('icrs') earth.representation_type = 'cartesian' return self.time_delay_from_location( np.array([np.float32(earth.x), np.float32(earth.y), np.float32(earth.z)]), right_ascension, - declination, t_gps) + declination, t_gps) From bb2c24e7060c7a204c57d0c7bb6340261f502578 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Wed, 24 Jun 2020 10:40:33 +0530 Subject: [PATCH 26/68] Commit_2: 24/6 Issue for line 336, 337, 274, 374 --- pycbc/detector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 813bc149f4a..fdfedb86397 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -274,7 +274,7 @@ def optimal_orientation(self, t_gps): def get_icrs_pos(self): loc = self.location loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.m, - frame='gcrs', representation_type='cartesian').transform_to('icrs') + frame='gcrs', representation_type='cartesian').transform_to('icrs') loc.representation_type = 'cartesian' conv = np.float32(((loc.x.unit/units.AU).decompose()).to_string()) loc = np.array([np.float32(loc.x), np.float32(loc.y), @@ -343,7 +343,7 @@ def get_pos(self, ref_time): def get_gcrs_pos(self, loc): loc = self.location loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.AU, - frame='gcrs', representation_type='cartesian').transform_to('icrs') + frame='icrs', representation_type='cartesian').transform_to('gcrs') loc.representation_type = 'cartesian' conv = np.float32(((loc.x.unit/units.m).decompose()).to_string()) loc = np.array([np.float32(loc.x), np.float32(loc.y), From 56bc7e98f912d94f17668eba246d63f57b1b9256 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Thu, 25 Jun 2020 08:33:18 +0530 Subject: [PATCH 27/68] Commit_1: 25/6 Addition of docstrings --- pycbc/detector.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/pycbc/detector.py b/pycbc/detector.py index fdfedb86397..81c65657b6c 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -272,6 +272,12 @@ def optimal_orientation(self, t_gps): return ra, dec def get_icrs_pos(self): + """ Transforms GCRS frame to ICRS frame + Returns + ---------- + loc: numpy.ndarray of shape (3,1) + ICRS coordinates in cartesian system + """ loc = self.location loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.m, frame='gcrs', representation_type='cartesian').transform_to('icrs') @@ -319,10 +325,22 @@ def effective_distance(distance, inclination, f_plus, f_cross): class LISA(object): + """For LISA detector + """ def __init__(self): None def get_pos(self, ref_time): + """Return the position of LISA detector for a given reference time + Parameters + ---------- + ref_time : numpy.ScalarType + Returns + ------- + location : numpy.ndarray of shape (3,3) + Returns the position of all 3 sattelites with each row + correspoding to a single axis. + """ ref_time = 2034 - Time(val=ref_time, format='gps', scale='utc').jyear n = np.array(range(1, 4)) @@ -341,6 +359,12 @@ def get_pos(self, ref_time): return self.location def get_gcrs_pos(self, loc): + """ Transforms ICRS frame to GCRS frame + Returns + ---------- + loc: numpy.ndarray of shape (3,3) + GCRS coordinates in cartesian system + """ loc = self.location loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.AU, frame='icrs', representation_type='cartesian').transform_to('gcrs') @@ -352,6 +376,25 @@ def get_gcrs_pos(self, loc): def time_delay_from_location(self, other_location, right_ascension, declination, t_gps): + """Return the time delay from the LISA detector to detector for + a signal with the given sky location. In other words return + `t1 - t2` where `t1` is the arrival time in this detector and + `t2` is the arrival time in the other location. Units(AU) + Parameters + ---------- + other_location : numpy.ndarray of coordinates in ICRS frame + A detector instance. + right_ascension : float + The right ascension (in rad) of the signal. + declination : float + The declination (in rad) of the signal. + t_gps : float + The GPS time (in s) of the signal. + Returns + ------- + numpy.ndarray + The arrival time difference between the detectors. + """ dx = np.array([self.location[0] - other_location[0], self.location[1] - other_location[1], self.location[2] - other_location[2]]) @@ -364,11 +407,32 @@ def time_delay_from_location(self, other_location, right_ascension, def time_delay_from_detector(self, det, right_ascension, declination, t_gps): + """Return the time delay from the LISA detector for a signal with + the given sky location in ICRS frame; i.e. return `t1 - t2` where + `t1` is the arrival time in this detector and `t2` is the arrival + time in the other detector. + Parameters + ---------- + other_detector : detector.Detector + A detector instance. + right_ascension : float + The right ascension (in rad) of the signal. + declination : float + The declination (in rad) of the signal. + t_gps : float + The GPS time (in s) of the signal. + Returns + ------- + numpy.ndarray + The arrival time difference between the detectors. + """ loc = Detector(det, t_gps).get_icrs_pos() return self.time_delay_from_location(loc, right_ascension, declination, t_gps) def time_delay_from_earth_center(self, right_ascension, declination, t_gps): + """Return the time delay from the earth center in ICRS frame + """ t_gps = Time(val=t_gps, format='gps', scale='utc') earth = coordinates.get_body('earth', t_gps, location=None).transform_to('icrs') From 0fa703413cffcabd6eee868fb8ab35489dbf47f6 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Thu, 25 Jun 2020 08:48:10 +0530 Subject: [PATCH 28/68] Commit_2: 25/6 Changes for code climate issues --- pycbc/detector.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 81c65657b6c..7fcb0a85cd3 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -331,18 +331,17 @@ def __init__(self): None def get_pos(self, ref_time): - """Return the position of LISA detector for a given reference time - Parameters - ---------- - ref_time : numpy.ScalarType - Returns - ------- - location : numpy.ndarray of shape (3,3) - Returns the position of all 3 sattelites with each row + """Return the position of LISA detector for a given reference time + Parameters + ---------- + ref_time : numpy.ScalarType + Returns + ------- + location : numpy.ndarray of shape (3,3) + Returns the position of all 3 sattelites with each row correspoding to a single axis. - """ + """ ref_time = 2034 - Time(val=ref_time, format='gps', scale='utc').jyear - n = np.array(range(1, 4)) kappa, _lambda_ = 0, 0 alpha = 2. * np.pi * ref_time/1 + kappa From 9b387b98da58b790d23f655b63ef74fa0c0c1f74 Mon Sep 17 00:00:00 2001 From: SSingh087 <66710757+SSingh087@users.noreply.github.com> Date: Thu, 25 Jun 2020 10:27:11 +0530 Subject: [PATCH 29/68] commit_3: 25/6 --- pycbc/detector.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pycbc/detector.py b/pycbc/detector.py index 7fcb0a85cd3..b21bce2e990 100644 --- a/pycbc/detector.py +++ b/pycbc/detector.py @@ -275,7 +275,7 @@ def get_icrs_pos(self): """ Transforms GCRS frame to ICRS frame Returns ---------- - loc: numpy.ndarray of shape (3,1) + loc: numpy.ndarray shape (3,1) units: AU ICRS coordinates in cartesian system """ loc = self.location @@ -357,14 +357,19 @@ def get_pos(self, ref_time): return self.location - def get_gcrs_pos(self, loc): + def get_gcrs_pos(self, location): """ Transforms ICRS frame to GCRS frame + Parameters + ---------- + loc : numpy.ndarray shape (3,1) units: AU + Cartesian Coordinates of the location + in ICRS frame Returns ---------- - loc: numpy.ndarray of shape (3,3) - GCRS coordinates in cartesian system + loc : numpy.ndarray shape (3,1) units: meters + GCRS coordinates in cartesian system """ - loc = self.location + loc = location loc = coordinates.SkyCoord(x=loc[0], y=loc[1], z=loc[2], unit=units.AU, frame='icrs', representation_type='cartesian').transform_to('gcrs') loc.representation_type = 'cartesian' From 2eab5a54365eb20e2498692760761a46027fda3d Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 20:41:51 +0200 Subject: [PATCH 30/68] Add inference config files (#3338) * add standard prior for BBH with uniform comoving volume * add marginalized phase example config file * add standard data settings for o1 and o2 * add standard dynesty and emcee_pt configs * add config file and bash script to create dynesty workflow * add files for emcee pt --- examples/inference/data/o1.ini | 35 +++ examples/inference/data/o2.ini | 36 +++ .../inference/models/marginalized_phase.ini | 44 ++++ .../priors/bbh-uniform_comoving_volume.ini | 197 +++++++++++++++++ examples/inference/samplers/dynesty.ini | 12 + .../emcee_pt-srcmasses_comoving_volume.ini | 39 ++++ .../create_workflow.sh | 16 ++ .../gw150914_gw170814-dynesty/events.ini | 24 ++ .../workflow_config.ini | 191 ++++++++++++++++ .../create_workflow.sh | 16 ++ .../gw150914_gw170814-emcee_pt/events.ini | 23 ++ .../workflow_config.ini | 206 ++++++++++++++++++ 12 files changed, 839 insertions(+) create mode 100644 examples/inference/data/o1.ini create mode 100644 examples/inference/data/o2.ini create mode 100644 examples/inference/models/marginalized_phase.ini create mode 100644 examples/inference/priors/bbh-uniform_comoving_volume.ini create mode 100644 examples/inference/samplers/dynesty.ini create mode 100644 examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini create mode 100755 examples/workflow/inference/gw150914_gw170814-dynesty/create_workflow.sh create mode 100644 examples/workflow/inference/gw150914_gw170814-dynesty/events.ini create mode 100644 examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini create mode 100755 examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh create mode 100644 examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini create mode 100644 examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini diff --git a/examples/inference/data/o1.ini b/examples/inference/data/o1.ini new file mode 100644 index 00000000000..cf0bd7ebf5a --- /dev/null +++ b/examples/inference/data/o1.ini @@ -0,0 +1,35 @@ +;============================================================================== +; +; Settings for analyzing O1 data +; +;============================================================================== +; +; This provides standard settings for analyzing H1, and L1 data in O1. +; It uses "OVERRIDE" for parameters that event-specific. Replace OVERRIDE +; either by editing this file, or using the config-override option in +; pycbc_inference. +[data] +instruments = H1 L1 +trigger-time = OVERRIDE +analysis-start-time = OVERRIDE +analysis-end-time = OVERRIDE +psd-estimation = median-mean +psd-start-time = -256 +psd-end-time = 256 +psd-inverse-length = 8 +psd-segment-length = 8 +psd-segment-stride = 4 +; If you are running on the Atlas cluster, an LDG cluster, or any computer +; with a ligo-data-server, you can use the frame-type argument to automatically +; locate the location of the frame files containing the data. If you are not +; running on one of those computers, download the necessary data from GWOSC +; (gw-openscience.org), remove the frame-type argument, and uncomment +; frame-files, pointing the latter to the files you downloaded. +;frame-files = H1:/PATH/TO/DOWNLOADED/H1FRAME.gwf L1:/PATH/TO/DOWNLOADED/L1FRAME.gwf +frame-type = H1:H1_LOSC_16_V1 L1:L1_LOSC_16_V1 +channel-name = H1:GWOSC-16KHZ_R1_STRAIN L1:GWOSC-16KHZ_R1_STRAIN +; A sample rate of 2048 is sufficient for BBH. If you are analyzing a BNS or +; NSBH, change to 4096. +sample-rate = 2048 +strain-high-pass = 15 +pad-data = 8 diff --git a/examples/inference/data/o2.ini b/examples/inference/data/o2.ini new file mode 100644 index 00000000000..ebe71cfb8b2 --- /dev/null +++ b/examples/inference/data/o2.ini @@ -0,0 +1,36 @@ +;============================================================================== +; +; Settings for analyzing O2 data +; +;============================================================================== +; +; This provides standard settings for analyzing H1, L1, and V1 data in O2. +; It uses "OVERRIDE" for parameters that event-specific. Replace OVERRIDE +; either by editing this file, or using the config-override option in +; pycbc_inference. +; +[data] +instruments = H1 L1 V1 +trigger-time = OVERRIDE +analysis-start-time = OVERRIDE +analysis-end-time = OVERRIDE +psd-estimation = median-mean +psd-start-time = -256 +psd-end-time = 256 +psd-inverse-length = 8 +psd-segment-length = 8 +psd-segment-stride = 4 +; If you are running on the Atlas cluster, an LDG cluster, or any computer +; with a ligo-data-server, you can use the frame-type argument to automatically +; locate the location of the frame files containing the data. If you are not +; running on one of those computers, download the necessary data from GWOSC +; (gw-openscience.org), remove the frame-type argument, and uncomment +; frame-files, pointing the latter to the files you downloaded. +frame-type = H1:H1_GWOSC_O2_16KHZ_R1 L1:L1_GWOSC_O2_16KHZ_R1 V1:V1_GWOSC_O2_16KHZ_R1 +;frame-files = H1:/PATH/TO/DOWNLOADED/H1FRAME.gwf L1:/PATH/TO/DOWNLOADED/L1FRAME.gwf V1:/PATH/TO/DOWNLOADED/V1FRAME.gwf +channel-name = H1:GWOSC-16KHZ_R1_STRAIN L1:GWOSC-16KHZ_R1_STRAIN V1:GWOSC-16KHZ_R1_STRAIN +; A sample rate of 2048 is sufficient for BBH. If you are analyzing a BNS or +; NSBH, change to 4096. +sample-rate = 2048 +strain-high-pass = 15 +pad-data = 8 diff --git a/examples/inference/models/marginalized_phase.ini b/examples/inference/models/marginalized_phase.ini new file mode 100644 index 00000000000..aca374167d3 --- /dev/null +++ b/examples/inference/models/marginalized_phase.ini @@ -0,0 +1,44 @@ +;============================================================================== +; +; Gaussian noise model with marginalized phase +; +;============================================================================== +; +; This provides settings for the marginalized Gaussian noise model. The +; low-frequency-cutoff of 20Hz is adequate for the O1-O3 LIGO and Virgo +; detectors. Change as appropriate for future detectors. +; +; The check-for-valid-times and shift-psd-times-to-valid options mean that +; the model will check to see if there are any data quality flags on during +; the requested analysis times (determined by the [data] section). If there +; are any flags on during the analysis times for a detector, that detector +; will automatically be removed from the analysis (an error will be raised if +; all detectors are removed). If you do not want this to happen, uncomment +; the err-on-missing-detectors option. In that case, an error will be raised +; if any detectors have a data quality flag on. The shift-psd-times-to-valid +; argument will cause the times used for estimating the PSD (determined by +; the psd-start|end-time arguments in the [data] section) to be shifted left +; or right to avoid any data quality flags. A shift up to +/- dT/2 will be +; tried, where dT is the difference between psd-end-time and psd-start-time. +; If no valid data can be found even with the maximum shift, the detector +; will be removed the analysis. To check for valid-times, the dq-* options +; are used in the strain model. See +; http://pycbc.org/pycbc/latest/html/inference.html#setting-data for details. +; +; The ignore-failed-waveforms option tells the model to treat points in +; parameter space that cause the waveform generator to fail as having 0 +; likelihood. This may be necessary for newer precessing models, in which +; the entire parameter space has not been fully tested. Note, however, that +; in that case you will not be able to tell if some parameters have been ruled +; out because the data disfavors them, or because the model failed. For this +; reason it is best to let the code raise an error (i.e., leave the option +; commented out), and only ignore these errors once you are confident you know +; the reason. +; +[model] +name = marginalized_phase +low-frequency-cutoff = 20 +check-for-valid-times = +shift-psd-times-to-valid = +;err-on-missing-detectors = +;ignore-failed-waveforms = diff --git a/examples/inference/priors/bbh-uniform_comoving_volume.ini b/examples/inference/priors/bbh-uniform_comoving_volume.ini new file mode 100644 index 00000000000..35d1fdc4682 --- /dev/null +++ b/examples/inference/priors/bbh-uniform_comoving_volume.ini @@ -0,0 +1,197 @@ +;============================================================================== +; +; Standard BBH Prior +; +;============================================================================== +; +; This configuration file provides a standard prior for binary-black holes +; (BBH). It uses a uniform prior on *source* masses, along with a uniform +; prior in comoving volume. Waveform transforms are included to convert the +; source masses into detector-frame masses using a standard cosmology +; (Planck 2015). The minimum and maximum volumes used correspond to a +; luminosity distances of ~10Mpc and ~1.5Gpc, respectively. It can therefore +; be used with BBH in O1-O2. To use for future detectors, simply change the +; volume limits. +; +; The coa_phase is not varied, so this has to be used with a model that +; marginalizes the phase automatically (e.g. the mariginalized_phase or relbin +; models). If you are not using a model that marginalizes the phase, uncomment +; the coa_phase in the [variable_params], along with the [prior-coa_phase] +; section. +; +; The mass range used is 10-80, and so is fine for GW150914-like BBH. For +; more lower-mass BBH, the prior range should be decreased. Keep in mind +; that lowering the mass prior increases the duration of the longest waveform +; admitted by the prior (meaning that you may need to change your +; analysis-start-time in your data section if you do that). +; +; The starting frequency of the waveform approximant is set to 20Hz (the +; f_lower and f_ref settings in the [static_params]). This is OK to use +; for the O1-O3 LIGO and Virgo detectors. With this lower-frequency cutoff +; and the lower-bound of the mass prior of 10, the longest waveform that may +; be generated is ~6s. Suggested analysis-start and end-time settings are -6 +; and 2 (with respect to the trigger-time), respectively. +; +; You may wish to lower the lower frequency cutoff for future detectors, +; in which the PSD has better lower-frequency performance. +; Keep in mind that decreasing the lower-frequency cutoff will make the +; waveforms have longer duration in the time domain, and so the analysis +; start time will need to be adjusted. +; +; No [data], [model], or [sampler] sections are provided here. This should be +; in used in tandem with additional configuration files that provide those +; sections. + +[variable_params] +delta_tc = +; Note that we call the masses srcmass[X]. This is because the waveform +; generator assumes that parameters called mass[X] are detector-frame masses. +; We therefore need to call the source masses something different; we choose +; "srcmass" here, but they could be called anything. In the waveform transforms +; sections below, we convert these to detector-frame masses. +srcmass1 = +srcmass2 = +spin1_a = +spin1_azimuthal = +spin1_polar = +spin2_a = +spin2_azimuthal = +spin2_polar = +comoving_volume = +inclination = +polarization = +ra = +dec = +; Uncomment this if you are not using a model that marginalizes over phase. +;coa_phase = + +[static_params] +approximant = IMRPhenomPv2 +f_lower = 20 +f_ref = 20 +; The trigger time is used with delta_tc to get the coalescence time tc. We'll +; get the trigger time from the data section (provided in a separate file). +trigger_time = ${data|trigger-time} + +;----------------------------------------------------------------------------- +; +; Intrinsic parameters +; +;----------------------------------------------------------------------------- + +[prior-srcmass1] +name = uniform +min-srcmass1 = 10 +max-srcmass1 = 80 + +[prior-srcmass2] +name = uniform +min-srcmass2 = 10 +max-srcmass2 = 80 + +[prior-spin1_a] +name = uniform +min-spin1_a = 0.0 +max-spin1_a = 0.99 + +[prior-spin1_polar+spin1_azimuthal] +name = uniform_solidangle +polar-angle = spin1_polar +azimuthal-angle = spin1_azimuthal + +[prior-spin2_a] +name = uniform +min-spin2_a = 0.0 +max-spin2_a = 0.99 + +[prior-spin2_polar+spin2_azimuthal] +name = uniform_solidangle +polar-angle = spin2_polar +azimuthal-angle = spin2_azimuthal + +; The waveform generator expects spins to be in cartesian coordinates, with +; names spin(1|2)(x|y|z). We therefore need to provide a waveform transform +; that converts the spherical coordinates that we have defined the spin prior +; in to cartesian coordinates. +[waveform_transforms-spin1x+spin1y+spin1z] +name = spherical_to_cartesian +x = spin1x +y = spin1y +z = spin1z +radial = spin1_a +polar = spin1_polar +azimuthal = spin1_azimuthal + +[waveform_transforms-spin2x+spin2y+spin2z] +name = spherical_to_cartesian +x = spin2x +y = spin2y +z = spin2z +radial = spin2_a +polar = spin2_polar +azimuthal = spin2_azimuthal + +;----------------------------------------------------------------------------- +; +; Extrinsic parameters +; +;----------------------------------------------------------------------------- + +[prior-delta_tc] +name = uniform +; We'll use +/-0.1s around the estimated coalescence (trigger) time. +min-delta_tc = -0.1 +max-delta_tc = 0.1 + +[waveform_transforms-tc] +; The waveform generator needs tc, which we calculate here. +name = custom +inputs = trigger_time, delta_tc +tc = trigger_time + delta_tc + +[prior-inclination] +name = sin_angle + +; Uncomment this section if you are not using a model that marginalizes over +; the phase. +;[prior-coa_phase] +;name = uniform_angle + +[prior-ra+dec] +name = uniform_sky + +[prior-polarization] +name = uniform_angle + +[prior-comoving_volume] +name = uniform +; These limits correspond to luminosity distances of ~[10, 1500) Mpc. Change +; if you are analyzing detections which are more than ~1Gpc away. +min-comoving_volume = 5e3 +max-comoving_volume = 9e9 + +; The following [waveform_transforms] sections convert the comoving volume +; to luminosity distance and the source masses to detector frame masses. +; The latter is done by calculating redshift from the comoving volume first. +; The order that transforms need to be applied is figured out automatically by +; the code, so it doesn't matter what order we put them here, as long as we +; provide transforms for all intermediate steps. +[waveform_transforms-redshift] +name = custom +inputs = comoving_volume +redshift = redshift_from_comoving_volume(comoving_volume) + +[waveform_transforms-distance] +name = custom +inputs = comoving_volume +distance = distance_from_comoving_volume(comoving_volume) + +[waveform_transforms-mass1] +name = custom +inputs = srcmass1, redshift +mass1 = srcmass1 * (1 + redshift) + +[waveform_transforms-mass2] +name = custom +inputs = srcmass2, redshift +mass2 = srcmass2 * (1 + redshift) diff --git a/examples/inference/samplers/dynesty.ini b/examples/inference/samplers/dynesty.ini new file mode 100644 index 00000000000..54a99d34d13 --- /dev/null +++ b/examples/inference/samplers/dynesty.ini @@ -0,0 +1,12 @@ +;============================================================================== +; +; Dynesty settings +; +;============================================================================== +; +; The following provides standard settings for the dynesty sampler. +; +[sampler] +name = dynesty +dlogz = 0.1 +nlive = 2000 diff --git a/examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini b/examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini new file mode 100644 index 00000000000..91c7106910a --- /dev/null +++ b/examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini @@ -0,0 +1,39 @@ +;============================================================================== +; +; Emcee PT settings for CBC, comoving volume +; +;============================================================================== +; +; The following provides standard settings for emcee_pt when analying a +; combact binary merger. This assumes that the prior is specified in terms +; of the source masses (srcmass1, srcmass2) and comoving volume +; (comoving_volume). To speed up convergence, the source masses are sampled in +; chirp mass and mass ratio, and the comoving volume is sampled in the log. +; +; We set the number of effective samples to 1500 because we've found that +; emcee_pt struggles to acquire more than ~8 independent samples per walker. +; +[sampler] +name = emcee_pt +nwalkers = 200 +ntemps = 20 +effective-nsamples = 1500 +checkpoint-interval = 2000 +max-samples-per-chain = 1000 + +[sampler-burn_in] +burn-in-test = nacl & max_posterior + +[sampling_params] +srcmass1, srcmass2 = mchirp, q +comoving_volume = logv + +[sampling_transforms-mchirp+q] +name = mass1_mass2_to_mchirp_q +mass1_param = srcmass1 +mass2_param = srcmass2 + +[sampling_transforms-logv] +name = log +inputvar = comoving_volume +outputvar = logv diff --git a/examples/workflow/inference/gw150914_gw170814-dynesty/create_workflow.sh b/examples/workflow/inference/gw150914_gw170814-dynesty/create_workflow.sh new file mode 100755 index 00000000000..d6a3353f559 --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-dynesty/create_workflow.sh @@ -0,0 +1,16 @@ +set -e + +WORKFLOW_NAME=inference-dynesty-gw150914_gw170814 +# Set the HTML_DIR to point to your public html page. This is where the results +# page will be written. +HTML_DIR='' +if [ "${HTML_DIR}" == '' ]; then + echo "Please set an HTML_DIR" + exit 1 +fi +SEED=8827 +pycbc_make_inference_workflow \ + --seed ${SEED} \ + --config-files workflow_config.ini events.ini \ + --workflow-name ${WORKFLOW_NAME} \ + --config-overrides results_page:output-path:${HTML_DIR}/${WORKFLOW_NAME} diff --git a/examples/workflow/inference/gw150914_gw170814-dynesty/events.ini b/examples/workflow/inference/gw150914_gw170814-dynesty/events.ini new file mode 100644 index 00000000000..f5c47020f92 --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-dynesty/events.ini @@ -0,0 +1,24 @@ +[event-gw150914] +label = GW150914+09:50:45UTC +config-files = bbh-uniform_comoving_volume.ini + marginalized_phase.ini + dynesty.ini + o1.ini +config-overrides = data:trigger-time:1126259462.413 + data:analysis-start-time:-8 + data:analysis-end-time:2 +; We can run multiple instances of inference to accumulate more samples by +; setting nruns. This is useful for emcee_pt. However, dynesty generally +; produces enough samples in a single run, so we'll leave this commented out. +; (The default is to do 1 run.) +;nruns = 2 + +[event-gw170814] +label = GW170814+10:30:43UTC +config-files = bbh-uniform_comoving_volume.ini + marginalized_phase.ini + dynesty.ini + o2.ini +config-overrides = data:trigger-time:1186741861.533 + data:analysis-start-time:-8 + data:analysis-end-time:2 diff --git a/examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini b/examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini new file mode 100644 index 00000000000..64899b1d197 --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini @@ -0,0 +1,191 @@ +[workflow] +; basic information used by the workflow generator +file-retention-level = all_triggers +; The start/end times here are just used for file naming. They can be set +; to anything -- they aren't used for anything, and have no effect on the +; analysis. The actual analysis times used are set by the [data] section in +; the configuration files given to pycbc_inference (specified in the events +; config file). +start-time = 1126259200 +end-time = 1126259600 + +[workflow-ifos] +; The ifos listed here are just used for file naming, it doesn't matter if +; they are not consistent with the actual detectors analyzed. +h1 = +l1 = +v1 = + +[extract_posterior] +; Here, we'll ensure that the output parameters are such that mass1 >= mass2 +; (and associated spins), change comoving volume into redshift and distance, +; add mchirp, q, chi_eff, and chi_p to the posterior files. +parameters = 'primary_mass(srcmass1, srcmass2):srcmass1' + 'secondary_mass(srcmass1, srcmass2):srcmass2' + 'primary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin1_a' + 'primary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin1_azimuthal' + 'primary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin1_polar' + 'secondary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin2_a' + 'secondary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin2_azimuthal' + 'secondary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin2_polar' + 'mchirp_from_mass1_mass2(srcmass1, srcmass2):srcmchirp' + 'chi_eff_from_spherical(srcmass1, srcmass2, spin1_a, spin1_polar, spin2_a, spin2_polar):chi_eff' + 'chi_p_from_spherical(srcmass1, srcmass2, spin1_a, spin1_azimuthal, spin1_polar, spin2_a, spin2_azimuthal, spin2_polar):chi_p' + 'redshift_from_comoving_volume(comoving_volume):redshift' + 'distance_from_comoving_volume(comoving_volume):distance' + '*' +force = + +[workflow-summary_table] +; Parameters that will be printed in the summary table. +; These must be from the set specified in extract_posterior. +table-params = srcmass1 srcmass2 + srcmchirp 'q_from_mass1_mass2(srcmass1, srcmass2):q' + chi_eff chi_p + ra dec delta_tc + distance redshift + 'snr_from_loglr(loglikelihood-lognl):SNR' +; The additional metadata will be printed below the table. We can print +; anything that is in the posterior files' attrs. +print-metadata = 'trigger_time:$t_0$' 'analyzed_detectors:Detectors' + +[workflow-summary_plots] +; Parameter posteriors that will plotted on the summary page. +; These must be from the set specified in extract_posterior. +; Each plot-group corresponds to a single plot that will be plot on the +; summary page. Generally, these should be limited to 1 or 2 dimensions +; (although this is not enforced); larger corner plots can be put in the +; Posteriors page. The plots for those are set by the [workflow-plot_params] +; section (see below). +; The settings for the posterior plots created here are read from the +; [plot_posterior_summary] section. +plot-group-mass1_mass2 = srcmass1 srcmass2 +plot-group-inc_distance = inclination distance +plot-group-chip_chieff = chi_p chi_eff +; Notice that we are not including ra and dec here. The sky map is +; created by [plot_skymap]. + +[workflow-plot_params] +; Parameter posteriors that will plotted on the "Posteriors" page. +; These must be from the set specified in extract_posterior. +; Each plot-group corresponds to a single plot that will be plot on the +; page. Since the events are split into their own sub-pages, it's ok to make +; large corner plots here (although too large and it will be hard to make +; out what each parameter is doing). +; The settings for the posterior plots created here are read from the +; [plot_posterior] section. +; Since we plotted source-frame masses on the summary page, here we'll +; plot detector-frame masses. +plot-group-masses = 'srcmass1/(1+redshift):mass1' + 'srcmass2/(1+redshift):mass2' + 'srcmchirp/(1+redshift):mchirp' + 'q_from_mass1_mass2(srcmass1, srcmass2):q' +plot-group-spins = spin1_a spin2_a + spin1_azimuthal spin2_azimuthal + spin1_polar spin2_polar + chi_eff chi_p +plot-group-extrinsic = ra dec delta_tc polarization inclination distance redshift + +[executables] +; paths to executables to use in workflow +inference = ${which:run_pycbc_inference} +extract_posterior = ${which:pycbc_inference_extract_samples} +plot_posterior = ${which:pycbc_inference_plot_posterior} +plot_posterior_summary = ${which:pycbc_inference_plot_posterior} +plot_prior = ${which:pycbc_inference_plot_prior} +table_summary = ${which:pycbc_inference_table_summary} +create_fits_file = ${which:pycbc_inference_create_fits} +plot_skymap = ${which:pycbc_inference_plot_skymap} +plot_spectrum = ${which:pycbc_plot_psd_file} +results_page = ${which:pycbc_make_html_page} +; diagnostic plots: at the moment, there are none for Dynesty + +[pegasus_profile] +; +MaxRunTimeHours is needed for running on the ATLAS cluster; comment out +; if your cluster does not need this. +condor|+MaxRunTimeHours = 1 + +[pegasus_profile-inference] +condor|request_memory = 40G +; +MaxRunTimeHours is needed for running on the ATLAS cluster; comment out +; if your cluster does not need this. +condor|+MaxRunTimeHours = 10 +condor|request_cpus = ${inference|nprocesses} + +[pegasus_profile-plot_prior] +condor|request_memory = 4G + +[pegasus_profile-plot_skymap] +condor|request_memory = 4G + +[pegasus_profile-plot_posterior] +condor|request_memory = 4G + +[pegasus_profile-plot_posterior_summary] +condor|request_memory = 4G + +[pegasus_profile-plot_samples] +condor|request_memory = 4G + +[inference] +; Command line options for pycbc_inference. +verbose = +; Set the nprocesses to the number of cores you want each job to use. The +; value you use is cluster dependent. +nprocesses = 64 + +[plot_posterior_summary] +; These are the command line options that will be passed to +; pycbc_inference_plot_posterior for creating the posterior plots on the +; summary page. These settings will cause density plots to be made. +plot-contours = +plot-marginal = +plot-density = +density-cmap = Blues +contour-color = black + +[plot_posterior] +; These are the command line options that will be passed to +; pycbc_inference_plot_posterior for creating the posterior plots on the +; posteriors page. These settings will cause scatter plots to be made showing +; each point in the posterior, colored by the matched-filter SNR. +plot-contours = +plot-marginal = +plot-scatter = +z-arg = snr + +[create_fits_file] +; These are the settings for creating a fits file, which is used to produce +; the skymaps. This program needs ligo.skymap to be installed. +; The maxpts option limits the number of points in the posterior that are used +; to create the skymap. This is mostly for speeding up run time. Comment out +; to use all points. +maxpts = 1000 +; Since the posterior file stores delta_tc, we need to tell the fits +; file how to calculate tc +tc = 'trigger_time+delta_tc' + +[plot_skymap] +; These are settings for creating the skymap. This program requires +; ligo.skymap to be installed. Here, we're just setting the colormap to be +; the same as the posterior density plots, above. +colormap = ${plot_posterior_summary|density-cmap} + +[plot_prior] +; This sets command-line options to use for the plot prior function. These +; plots are on the "priors" page. The default (giving no options) is to +; plot all of the variable params. + +[table_summary] +; This sets command-line options for the table on the summary page. You +; should not need to set anything here. + +[plot_spectrum] +; This sets command-line options for the ASD plots on the detector sensitivity +; page. The dyn-range-factor needs to be set to 1. +dyn-range-factor = 1 + +[results_page] +; This sets settings for creating the results page. You may want to change +; the analysis title, to make it more descriptive. +analysis-title = "Inference results with dynesty" diff --git a/examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh b/examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh new file mode 100755 index 00000000000..3bbf79974bc --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh @@ -0,0 +1,16 @@ +set -e + +WORKFLOW_NAME=inference-gw150914_gw170814 +# Set the HTML_DIR to point to your public html page. This is where the results +# page will be written. +HTML_DIR='' +if [ "${HTML_DIR}" == '' ]; then + echo "Please set an HTML_DIR" + exit 1 +fi +SEED=978241 +pycbc_make_inference_workflow \ + --seed ${SEED} \ + --config-files workflow_config.ini events.ini \ + --workflow-name ${WORKFLOW_NAME} \ + --config-overrides results_page:output-path:${HTML_DIR}/${WORKFLOW_NAME} diff --git a/examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini b/examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini new file mode 100644 index 00000000000..d0febdaf045 --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini @@ -0,0 +1,23 @@ +[event-gw150914] +label = GW150914+09:50:45UTC +config-files = bbh-uniform_comoving_volume.ini + marginalized_phase.ini + emcee_pt-srcmasses_comoving_volume.ini + o1.ini +config-overrides = data:trigger-time:1126259462.413 + data:analysis-start-time:-6 + data:analysis-end-time:2 +; We'll run inference twice to double the number of independent samples +nruns = 2 + +[event-gw170814] +label = GW170814+10:30:43UTC +config-files = bbh-uniform_comoving_volume.ini + marginalized_phase.ini + emcee_pt-srcmasses_comoving_volume.ini + o2.ini +config-overrides = data:trigger-time:1186741861.533 + data:analysis-start-time:-6 + data:analysis-end-time:2 +; We'll run inference twice to double the number of independent samples +nruns = 2 diff --git a/examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini b/examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini new file mode 100644 index 00000000000..6a21595451a --- /dev/null +++ b/examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini @@ -0,0 +1,206 @@ +[workflow] +; basic information used by the workflow generator +file-retention-level = all_triggers +; The start/end times here are just used for file naming. They can be set +; to anything -- they aren't used for anything, and have no effect on the +; analysis. The actual analysis times used are set by the [data] section in +; the configuration files given to pycbc_inference (specified in the events +; config file). +start-time = 1126259200 +end-time = 1126259600 + +[workflow-ifos] +; The ifos listed here are just used for file naming, it doesn't matter if +; they are not consistent with the actual detectors analyzed. +h1 = +l1 = +v1 = + +[extract_posterior] +; Here, we'll ensure that the output parameters are such that mass1 >= mass2 +; (and associated spins), change comoving volume into redshift and distance, +; add mchirp, q, chi_eff, and chi_p to the posterior files. +parameters = 'primary_mass(srcmass1, srcmass2):srcmass1' + 'secondary_mass(srcmass1, srcmass2):srcmass2' + 'primary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin1_a' + 'primary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin1_azimuthal' + 'primary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin1_polar' + 'secondary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin2_a' + 'secondary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin2_azimuthal' + 'secondary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin2_polar' + 'mchirp_from_mass1_mass2(srcmass1, srcmass2):srcmchirp' + 'chi_eff_from_spherical(srcmass1, srcmass2, spin1_a, spin1_polar, spin2_a, spin2_polar):chi_eff' + 'chi_p_from_spherical(srcmass1, srcmass2, spin1_a, spin1_azimuthal, spin1_polar, spin2_a, spin2_azimuthal, spin2_polar):chi_p' + 'redshift_from_comoving_volume(comoving_volume):redshift' + 'distance_from_comoving_volume(comoving_volume):distance' + '*' +force = + +[workflow-summary_table] +; Parameters that will be printed in the summary table. +; These must be from the set specified in extract_posterior. +table-params = srcmass1 srcmass2 + srcmchirp 'q_from_mass1_mass2(srcmass1, srcmass2):q' + chi_eff chi_p + ra dec delta_tc + distance redshift + 'snr_from_loglr(loglikelihood-lognl):SNR' +; The additional metadata will be printed below the table. We can print +; anything that is in the posterior files' attrs. +print-metadata = 'trigger_time:$t_0$' 'analyzed_detectors:Detectors' + +[workflow-summary_plots] +; Parameter posteriors that will plotted on the summary page. +; These must be from the set specified in extract_posterior. +; Each plot-group corresponds to a single plot that will be plot on the +; summary page. Generally, these should be limited to 1 or 2 dimensions +; (although this is not enforced); larger corner plots can be put in the +; Posteriors page. The plots for those are set by the [workflow-plot_params] +; section (see below). +; The settings for the posterior plots created here are read from the +; [plot_posterior_summary] section. +plot-group-mass1_mass2 = srcmass1 srcmass2 +plot-group-inc_distance = inclination distance +plot-group-chip_chieff = chi_p chi_eff +; Notice that we are not including ra and dec here. The sky map is +; created by [plot_skymap]. + +[workflow-plot_params] +; Parameter posteriors that will plotted on the "Posteriors" page. +; These must be from the set specified in extract_posterior. +; Each plot-group corresponds to a single plot that will be plot on the +; page. Since the events are split into their own sub-pages, it's ok to make +; large corner plots here (although too large and it will be hard to make +; out what each parameter is doing). +; The settings for the posterior plots created here are read from the +; [plot_posterior] section. +; Since we plotted source-frame masses on the summary page, here we'll +; plot detector-frame masses. +plot-group-masses = 'srcmass1/(1+redshift):mass1' + 'srcmass2/(1+redshift):mass2' + 'srcmchirp/(1+redshift):mchirp' + 'q_from_mass1_mass2(srcmass1, srcmass2):q' +plot-group-spins = spin1_a spin2_a + spin1_azimuthal spin2_azimuthal + spin1_polar spin2_polar + chi_eff chi_p +plot-group-extrinsic = ra dec delta_tc polarization inclination distance redshift + +[executables] +; paths to executables to use in workflow +inference = ${which:run_pycbc_inference} +extract_posterior = ${which:pycbc_inference_extract_samples} +plot_posterior = ${which:pycbc_inference_plot_posterior} +plot_posterior_summary = ${which:pycbc_inference_plot_posterior} +plot_prior = ${which:pycbc_inference_plot_prior} +table_summary = ${which:pycbc_inference_table_summary} +create_fits_file = ${which:pycbc_inference_create_fits} +plot_skymap = ${which:pycbc_inference_plot_skymap} +plot_spectrum = ${which:pycbc_plot_psd_file} +results_page = ${which:pycbc_make_html_page} +; diagnostic plots +plot_acceptance_rate = ${which:pycbc_inference_plot_acceptance_rate} +plot_samples = ${which:pycbc_inference_plot_samples} + +[pegasus_profile] +; +MaxRunTimeHours is needed for running on the ATLAS cluster; comment out +; if your cluster does not need this. +condor|+MaxRunTimeHours = 1 + +[pegasus_profile-inference] +condor|request_memory = 40G +; +MaxRunTimeHours is needed for running on the ATLAS cluster; comment out +; if your cluster does not need this. +condor|+MaxRunTimeHours = 10 +condor|request_cpus = ${inference|nprocesses} + +[pegasus_profile-plot_prior] +condor|request_memory = 4G + +[pegasus_profile-plot_skymap] +condor|request_memory = 4G + +[pegasus_profile-plot_posterior] +condor|request_memory = 4G + +[pegasus_profile-plot_posterior_summary] +condor|request_memory = 4G + +[pegasus_profile-plot_samples] +condor|request_memory = 4G + +[inference] +; Command line options for pycbc_inference. +verbose = +; Set the nprocesses to the number of cores you want each job to use. The +; value you use is cluster dependent. +nprocesses = 32 + +[plot_posterior_summary] +; These are the command line options that will be passed to +; pycbc_inference_plot_posterior for creating the posterior plots on the +; summary page. These settings will cause density plots to be made. +plot-contours = +plot-marginal = +plot-density = +density-cmap = Blues +contour-color = black + +[plot_posterior] +; These are the command line options that will be passed to +; pycbc_inference_plot_posterior for creating the posterior plots on the +; posteriors page. These settings will cause scatter plots to be made showing +; each point in the posterior, colored by the matched-filter SNR. +plot-contours = +plot-marginal = +plot-scatter = +z-arg = snr + +[create_fits_file] +; These are the settings for creating a fits file, which is used to produce +; the skymaps. This program needs ligo.skymap to be installed. +; The maxpts option limits the number of points in the posterior that are used +; to create the skymap. This is mostly for speeding up run time. Comment out +; to use all points. +maxpts = 1000 +; Since the posterior file stores delta_tc, we need to tell the fits +; file how to calculate tc +tc = 'trigger_time+delta_tc' + +[plot_skymap] +; These are settings for creating the skymap. This program requires +; ligo.skymap to be installed. Here, we're just setting the colormap to be +; the same as the posterior density plots, above. +colormap = ${plot_posterior_summary|density-cmap} + +[plot_prior] +; This sets command-line options to use for the plot prior function. These +; plots are on the "priors" page. The default (giving no options) is to +; plot all of the variable params. + +[table_summary] +; This sets command-line options for the table on the summary page. You +; should not need to set anything here. + +[plot_spectrum] +; This sets command-line options for the ASD plots on the detector sensitivity +; page. The dyn-range-factor needs to be set to 1. +dyn-range-factor = 1 + +[plot_acceptance_rate] +; This sets command-line options for the acceptance rate diagnostic plots. +; This should only be used for MCMC samplers. You do not need to set anything +; here for this plot. + +[plot_samples] +; This sets command-line options for the plot of samples chains. +; This should only be used for MCMC samplers. Here, we are telling it to plot +; all chains, and to show every single iteration. +chains = all +thin-start = 0 +thin-interval = 1 + +[results_page] +; This sets settings for creating the results page. You may want to change +; the analysis title, to make it more descriptive. +analysis-title = "Inference results" From 29335c53549d146e7c295b894972e47f42f465c4 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 22:26:52 +0200 Subject: [PATCH 31/68] Update inference workflow docs (#3339) * update the inference workflow docs * fix some typos --- .../pycbc_make_inference_workflow.rst | 275 +++++++++++++++--- 1 file changed, 237 insertions(+), 38 deletions(-) diff --git a/docs/workflow/pycbc_make_inference_workflow.rst b/docs/workflow/pycbc_make_inference_workflow.rst index 1d6ee105343..8f121d16dca 100644 --- a/docs/workflow/pycbc_make_inference_workflow.rst +++ b/docs/workflow/pycbc_make_inference_workflow.rst @@ -6,75 +6,274 @@ Introduction =============== -The executable ``pycbc_make_inference_workflow`` is a workflow generator to setup a parameter estimation analysis. +The executable ``pycbc_make_inference_workflow`` is a workflow generator to +setup a parameter estimation analysis. It can be setup to run on one or more +events at once. For each event, the workflow: -=========================== -Workflow configuration file -=========================== + #. Runs ``pycbc_inference``. If desired, you can run multiple independent + instances of ``pycbc_inference`` on the same event. + #. Extracts a posterior file using ``pycbc_inference_extract_samples``. If + multiple instances of ``pycbc_inference`` were run on the same event, the + samples from all of the runs will be combined into a single posterior file. + You can also have derived parameters written out to the posterior file. + #. Makes various posterior plots and tables. The prior is also plotted. If + you are analyzing gravitational-wave data, a plot of power spectral density + (PSD) used for each event is also created. + #. If you are working in a Python 3.x environment you can optionally have + the workflow produce a skymap for each event (this requires ``ligo.skymap`` + to be installed). + #. Optionally creates sampler-dependent diagnostic plots. + #. Generates a results html page that gathers all of the results. -A sample workflow configuration file: +The workflow generator requires a configuration file that tells it what plots +to make, what parameters to produce posteriors for, which events to analyze, +and any other settings to use for the various executables that are run. -.. literalinclude:: ../../examples/workflow/inference/workflow_config.ini - :language: ini +For each event, one or more inference configuration files (the file(s) passed +to ``pycbc_inference``) must also be provided. These are separate from the +workflow configuration file, as they describe how to analyze each event. You +tell the workflow how many events to analyze and which inference configuration +files to use for each event via ``[event-{label}]`` sections in the workflow +configuration file. Here, ``{label}`` is a unique label for each event. -:download:`Download <../../examples/workflow/inference/workflow_config.ini>` +To illustrate how to setup and use a workflow, below we provide an example +of how to setup the workflow to analyze two binary black hole events at once +-- GW150914 and GW170814. -============================ -Inference configuration file -============================ -You will also need a configuration file with sections that tells ``pycbc_inference`` how to construct the priors. A sample inference configuration file is: +================================================ +Example: GW150914 and GW170814 with ``emcee_pt`` +================================================ -.. literalinclude:: ../../examples/workflow/inference/inference.ini - :language: ini +In this example we setup a workflow to analyze GW150914 and GW170814 using +``emcee_pt``. We will use a prior that is uniform in comoving volume and +uniform in source masses. As we will be using the ``IMRPhenomPv2`` waveform +approximant, we will use the ``marginalized_phase`` Gaussian noise model. -:download:`Download <../../examples/workflow/inference/inference.ini>` +This workflow will produce a results page that looks like the example +`here `_. -A sample configuration file for parameter estimation on the ringdown is: +The inference configuration files we will use can all be found in the pycbc +``examples`` directory. Below, we provide instructions on what files need +to be downloaded, and how to setup and run the workflow. -.. literalinclude:: ../../examples/workflow/inference/ringdown_inference.ini - :language: ini -:download:`Download <../../examples/workflow/inference/ringdown_inference.ini>` +------------------------------------- +Get the inference configuration files +------------------------------------- -If you want to use another variable parameter in the inference sampler then add its name to ``[variable_args]`` and add a prior section like shown above. +We need the configuration files for ``pycbc_inference``. These define the +prior, model, sampler, and data to use for each event. -===================== -Generate the workflow -===================== +**The prior:** -To generate a workflow you will need your configuration files. If you want to run on the loudest triggers from a PyCBC coincident search workflow then run: +.. literalinclude:: ../../examples/inference/priors/bbh-uniform_comoving_volume.ini + :language: ini -.. literalinclude:: ../../examples/workflow/inference/run_pycbc_make_inference_workflow.sh - :language: bash +:download:`Download <../../examples/inference/priors/bbh-uniform_comoving_volume.ini>` + +**The model:** + +.. literalinclude:: ../../examples/inference/models/marginalized_phase.ini + :language: ini + +:download:`Download <../../examples/inference/models/marginalized_phase.ini>` + +**The sampler:** + +.. literalinclude:: ../../examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini + :language: ini + +:download:`Download <../../examples/inference/samplers/emcee_pt-srcmasses_comoving_volume.ini>` + +**The data:** We also need configuration files for the data. Since GW150914 +occured during O1 while GW170814 occurred during O2, we need both the standard +O1 and O2 files: + +.. literalinclude:: ../../examples/inference/data/o1.ini + :language: ini + +:download:`Download <../../examples/inference/data/o1.ini>` + +.. literalinclude:: ../../examples/inference/data/o2.ini + :language: ini + +:download:`Download <../../examples/inference/data/o2.ini>` + + +------------------------------------- +Setup the workflow configuration file +------------------------------------- + +As discussed above, the workflow configuration file specifes what events to +analyze, what programs to run, and what settings to use for those programs. +Since the same general workflow settings can be used for different classes of +events, here we have split the workflow configuration file into two separate +files, ``events.ini`` and ``workflow_config.ini``. The former specifies what +events we are analyzing in this run, while the latter specifies all of the +other settings. As we will see below, we can simply provide these two files to +``pycbc_make_inference_workflow``'s ``--config-file`` argument; it will +automatically combine them into a single file. + +The events: + +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini + :language: ini + +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-emcee_pt/events.ini>` + +The rest of the configuration file: + +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini + :language: ini + +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-emcee_pt/workflow_config.ini>` + +**Notes**: + + * Since the ``[executables]`` section contains entries for + ``create_fits_file`` and ``plot_skymap``, the workflow will try to create + sky maps. **This requires a Python 3.x environment and** ``ligo.skymap`` + **to be installed.** If you have not installed ``ligo.skymap`` yet, do so by + running:: + + pip install ligo.skymap -:download:`Download <../../examples/workflow/inference/run_pycbc_make_inference_workflow.sh>` + * If you do not want to create sky maps, or are running a Python 2.7 + environment, you can turn this off by simply commenting out or removing + ``create_fits_file`` and ``plot_skymap`` from the ``[executables]`` section. -Where ``${BANK_FILE}`` is the path to the template bank HDF file, ``${STATMAP_FILE}`` is the path to the combined statmap HDF file, ``${SNGL_H1_PATHS}`` and ``${SNGL_L1_PATHS}`` are the paths to the merged single-detector HDF files, and ``${WORKFLOW_START_TIME}`` and ``${WORKFLOW_END_TIME}`` are the start and end time of the coincidence workflow. + * The number of cores that will be used by ``pycbc_inference`` is set by the + ``nprocesses`` argument in the ``[inference]`` section. You should set this + to the number of cores you expect to be able to get on your cluster. In the + configuration presented here, we are limited to shared memory cores. (It + is possible to run using MPI in order to parallelize over a larger number + of cores, but that requires special condor settings that must be implemented + by your cluster admins. That is outside the scope of these instructions.) -Else you can run from a specific GPS end time with the ``--gps-end-time`` option like: + * Notice that the number of processes that ``pycbc_inference`` will use is + referenced by the ``condor|request_cpus`` argument in the + ``[pegasus_profile-inference]`` section. This argurment is what tells + condor how many cores to assign to the job, and so sets the actual number + of resources ``pycbc_inference`` will get. Generally, you want this to + be the same as what is fed to ``pycbc_inference``'s ``nprocesses`` + option. -.. literalinclude:: ../../examples/workflow/inference/run_pycbc_make_inference_workflow_2.sh +The ``workflow_config.ini`` file can be used with any of the MCMC samplers when +analyzing a gravitational wave that involves the parameters mentioned in the +file. If you wanted to analyze other binary black holes, you could use this +same file, simply changing the ``events.ini`` file to point to the events +you want to analyze. + + +--------------------- +Generate the workflow +--------------------- + +Assuming that you have downloaded all of the configuration files to the +same directory, you can generate the workflow by running the following script: + +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh :language: bash -:download:`Download <../../examples/workflow/inference/run_pycbc_make_inference_workflow_2.sh>` +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-emcee_pt/create_workflow.sh>` + +Note that you need to set the ``HTML_DIR`` before running. This tells the +workflow where to save the results page when done. You can also change +``WORKFLOW_NAME`` if you like. -Where ``${GPS_END_TIME}`` is the GPS end time of the trigger. +You should also change the ``SEED`` everytime you create a different workflow. +This sets the seed that is passed to ``pycbc_inference`` (you set it here +because it will be incremented for every ``pycbc_inference`` job that will be +run in the workflow). -For the CBC example above define the environment variables ``GPS_END_TIME=1126259462`` and ``OUTPUT_MAP_PATH=output.map``. +After the workflow has finished it will have created a directory named +``${WORKFLOW_NAME}-output``. This contains the ``dax`` and all necessary files +to run the workflow. -============================= +----------------------------- Plan and execute the workflow -============================= +----------------------------- -If you are on LDG, you need to define an accounting group. Plan and submit the workflow with:: +Change directory into the ``${WORKFLOW_NAME}-output`` directory:: + + cd ${WORKFLOW_NAME}-output + +If you are on the ATLAS cluster (at AEI Hannover) or on an LDG cluster, you +need to define an accounting group tag (talk to your cluster admins if you do +not know what this is). Once you know what accounting-group tag to use, plan +and submit the workflow with:: # submit workflow - cd ${OUTPUT_DIR} pycbc_submit_dax --dax ${WORKFLOW_NAME}.dax \ --no-grid \ --enable-shared-filesystem \ --accounting-group ${ACCOUNTING_GROUP} -Where ``${ACCOUNTING_GROUP}`` is the appropriate tag for your workflow. +Here, ``${ACCOUNTING_GROUP}`` is the appropriate tag for your workflow. + +Once it is running, you can monitor the status of the workflow by running +``./status`` from within the ``${WORKFLOW_NAME}-output`` directory. If your +workflow fails for any reason, you can see what caused the failure by running +``./debug``. If you need to stop the workflow at any point, run ``./stop``. +To resume a workflow, run ``./start``. If the ``pycbc_inference`` jobs were +still running, and they had checkpointed, they will resume from their last +checkpoint upon restart. + +------------ +Results page +------------ + +When the workflow has completed successfully it will write out the results +page to the directory you specified in the ``create_workflow.sh`` script. +You can see what the result page will look like `here `_. + + +============================================= +Example: GW150914 and GW170814 with `dynesty` +============================================= + +In this example, we repeat the above analysis, but using the `dynesty` +sampler. We can use the same +:download:`prior <../../examples/inference/priors/bbh-uniform_comoving_volume.ini>`, +:download:`model <../../examples/inference/models/marginalized_phase.ini>`, +and :download:`o1 <../../examples/inference/data/o1.ini>` and +:download:`o2 <../../examples/inference/data/o2.ini>` inference configuration +files as above. New files that we need are: + + * The sampler configuration file for ``dynesty``: + +.. literalinclude:: ../../examples/inference/samplers/dynesty.ini + :language: ini + +:download:`Download <../../examples/inference/samplers/dynesty.ini>` + + * An ``events`` file which uses ``dynesty``: + +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-dynesty/events.ini + :language: ini + +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-dynesty/events.ini>` + +Note that here, we are not running ``pycbc_inference`` multiple times. This is +because a single run of ``dynesty`` with the settings we are using (2000 live +points) produces a large number of (O(10 000)) samples. + +We also need a slightly different +:download:`workflow configuration file <../../examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini>`. The only difference from the workflow configuration file from the one above +is that the diagnostic plot executable have been removed +(``plot_acceptance_rate`` and ``plot_samples``). This is because these +diagnostics do not work for ``dynesty``, a nested sampler. As above, **set the +nprocesses argument in the** ``[inference]`` **section to the number of cores that +works for your cluster.*** + +Note that we could have run both the ``emcee_pt`` analysis, above, and the +``dynesty`` analysis together in a single workflow. However, to do so, we would +need to remove any diagnostic plots that are unique to each sampler. + +Once you have downloaded the necessary files, create the workflow and launch +it using the same ``create_workflow.sh`` script and ``pycbc_submit_dax`` +commands as above, making sure to change the ``WORKFLOW_NAME`` and ``SEED``. +This will produce a results page that looks like the example +`here `_. From 8a1cc35d941f4d919b543b7a839f7486dd214cd4 Mon Sep 17 00:00:00 2001 From: Koustav Chandra <56917539+KoustavChandra@users.noreply.github.com> Date: Fri, 26 Jun 2020 14:02:45 +0530 Subject: [PATCH 32/68] added support for IMRPhenomHM and IMRPhenomPv3HM length_in_time (#3337) --- pycbc/waveform/waveform.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pycbc/waveform/waveform.py b/pycbc/waveform/waveform.py index f066a1d152f..9a2acaf06a5 100644 --- a/pycbc/waveform/waveform.py +++ b/pycbc/waveform/waveform.py @@ -832,6 +832,8 @@ def imrphenomd_length_in_time(**kwds): _filter_time_lengths["IMRPhenomPv2"] = imrphenomd_length_in_time _filter_time_lengths["IMRPhenomD_NRTidal"] = imrphenomd_length_in_time _filter_time_lengths["IMRPhenomPv2_NRTidal"] = imrphenomd_length_in_time +_filter_time_lengths["IMRPhenomHM"] = imrphenomd_length_in_time +_filter_time_lengths["IMRPhenomPv3HM"] = imrphenomd_length_in_time _filter_time_lengths["SpinTaylorF2"] = spa_length_in_time _filter_time_lengths["TaylorF2NL"] = spa_length_in_time _filter_time_lengths["PreTaylorF2"] = spa_length_in_time From 57ab41c74d3163f262339bb261e0a0202223913c Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 21:37:44 +0000 Subject: [PATCH 33/68] update the inference inj workflow docs --- .../pycbc_make_inference_inj_workflow.rst | 161 ++++++++++++++---- 1 file changed, 129 insertions(+), 32 deletions(-) diff --git a/docs/workflow/pycbc_make_inference_inj_workflow.rst b/docs/workflow/pycbc_make_inference_inj_workflow.rst index 99f44f2bbda..511c71f6b38 100644 --- a/docs/workflow/pycbc_make_inference_inj_workflow.rst +++ b/docs/workflow/pycbc_make_inference_inj_workflow.rst @@ -6,55 +6,152 @@ Introduction =============== -The executable ``pycbc_make_inference_inj_workflow`` is a workflow generator to setup a parameter estimation analysis. - - -=========================== -Workflow configuration file -=========================== - -A sample workflow configuration file: - -.. literalinclude:: ../../examples/workflow/inference/inj_workflow_config.ini +The executable ``pycbc_make_inference_inj_workflow`` is a workflow generator to +setup a parameter estimation analysis on one or more simulated signals. +Optionally, it can also run a percentile-percentile on the injections it +analyzed. + +The workflow is very similar to the standard inference workflow created by +`pycbc_make_inference_workflow `. The main +differences are: + + * Rather than providing one or more ``[event-{label}]`` sections in the + workflow config file, you provide a single ``[workflow-inference]`` section. + The syntax for this section is very similar to the ``[event]`` section(s) in + the standard workflow, as it sets the configuration files that are used + by ``pycbc_inference``. The difference is that the same settings are used + for all injections. + * When you create the workflow, you either pass it a ``--num-injections`` + or a ``--injection-file``. If the former, the workflow will draw the + specified number of injections from the prior given to ``pycbc_inference`` + and analyze them. If the latter, the workflow will analyze the injections + specified in the given injection file. The file must be an HDF file; + see ``pycbc_create_injections`` for details. In either case, each injection + is treated as an independent event, with its own summary section in the + results page. + * You may optionally have the workflow do a percentile-percentile test on + the injections. You do this by adding the necessary executables and + corresponding sections to the ``workflow_config.ini`` file. See the example + below for details. If a percentile-percentile test is done, the results + page will have an additional tab that gives a summary of the PP test on + all of the parameters, as well as PP plots and plots of injected versus + recoverd values. + * It is recommend (though not required) that you add + ``plot-injection-parameters`` to the ``[plot_posterior]`` and + ``[plot_posterior_summary]`` sections. Doing so will cause redlines to + be plotted at the injected parameter values on the posterior plots, so + that you may visually inspect how well the injected values are recovered. + This may also require providing an ``injection-samples-map`` argument. + See the example file below for details. + +In the `standard workflow ` we used two +workflow configuration files, a ``workflow_config.ini`` and an ``events.ini``. +For the injection workflow, we can use the same ``workflow_config.ini``; we +just need to setup an ``injections_config.ini`` to add the needed sections +and arguments for the injections workflow. + +In the example below, we demonstrate how to use the injections workflow +using the same prior and sampler settings as given in the +`standard workflow ` example. + + +======================================== +Example: BBH injections with ``dynesty`` +======================================== + +In this example we use the same prior and sampler settings as the example +of analyzing GW150914 and GW170814 in the +`pycbc_make_inference_workflow ` +documentation. We will analyze 10 injections, and do a percentile-percentile +test on them. (This is only as an example. To do a full PP test, we recommend +using at least 100 injections.) + +------------------------------------- +Get the inference configuration files +------------------------------------- + +We can use the same +:download:`prior <../../examples/inference/priors/bbh-uniform_comoving_volume.ini>`, +:download:`model <../../examples/inference/models/marginalized_phase.ini>`, +and :download:`sampler <../../examples/inference/samplers/dynesty.ini>` +configuration files as used in the +`pycbc_make_inference_workflow ` example. +However, instead of analyzing O1 or O2 data, we will create fake Gaussian +noise. To do that, we will use the +:download:`data.ini <../../examples/inference/bbh-injection/data.ini>` file +used for the `BBH simulation example <../inference/examples/bbh.rst>`. + +------------------------------------- +Setup the workflow configuration file +------------------------------------- + +As discussed above, we can use the same :download:`workflow configuration file <../../examples/workflow/inference/gw150914_gw170814-dynesty/workflow_config.ini>` as used in +the ``dynesty`` example in the standard workflow. We need to create +an ``injections_config.ini`` file to go along with the ``workflow_config.ini``: + +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-dynesty/injections_config.ini :language: ini -:download:`Download <../../examples/workflow/inference/inj_workflow_config.ini>` - - -============================ -Inference configuration file -============================ -A sample inference configuration file: - -.. literalinclude:: ../../examples/workflow/inference/inference.ini - :language: ini +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-dynesty/injections_config.ini>` -:download:`Download <../../examples/workflow/inference/inference.ini>` - -===================== +--------------------- Generate the workflow -===================== +--------------------- -To generate a workflow you will need your configuration files. Generate the workflow using following example run script: +Assuming that you have downloaded all of the configuration files to the +same directory, you can generate the workflow by running the following script: -.. literalinclude:: ../../examples/workflow/inference/run_pycbc_make_inference_inj_workflow.sh +.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-dynesty/create_inj_workflow.sh :language: bash -:download:`Download <../../examples/workflow/inference/run_pycbc_make_inference_inj_workflow.sh>` +:download:`Download <../../examples/workflow/inference/gw150914_gw170814-dynesty/create_inj_workflow.sh>` + +Note that you need to set the ``HTML_DIR`` before running. This tells the +workflow where to save the results page when done. You can also change +``WORKFLOW_NAME`` if you like. +You should also change the ``SEED`` everytime you create a different workflow. +This sets the seed that is passed to ``pycbc_inference`` (you set it here +because it will be incremented for every ``pycbc_inference`` job that will be +run in the workflow). -============================= +After the workflow has finished it will have created a directory named +``${WORKFLOW_NAME}-output``. This contains the ``dax`` and all necessary files +to run the workflow. + +----------------------------- Plan and execute the workflow -============================= +----------------------------- + +Change directory into the ``${WORKFLOW_NAME}-output`` directory:: -If you are on LDG, you need to define an accounting group. Finally plan and submit the workflow with: + cd ${WORKFLOW_NAME}-output -:: +If you are on the ATLAS cluster (at AEI Hannover) or on an LDG cluster, you +need to define an accounting group tag (talk to your cluster admins if you do +not know what this is). Once you know what accounting-group tag to use, plan +and submit the workflow with:: # submit workflow - cd ${output_dir} pycbc_submit_dax --dax ${WORKFLOW_NAME}.dax \ --no-grid \ --enable-shared-filesystem \ --accounting-group ${ACCOUNTING_GROUP} +Here, ``${ACCOUNTING_GROUP}`` is the appropriate tag for your workflow. + +Once it is running, you can monitor the status of the workflow by running +``./status`` from within the ``${WORKFLOW_NAME}-output`` directory. If your +workflow fails for any reason, you can see what caused the failure by running +``./debug``. If you need to stop the workflow at any point, run ``./stop``. +To resume a workflow, run ``./start``. If the ``pycbc_inference`` jobs were +still running, and they had checkpointed, they will resume from their last +checkpoint upon restart. + +------------ +Results page +------------ + +When the workflow has completed successfully it will write out the results +page to the directory you specified in the ``create_inj_workflow.sh`` script. +You can see what the result page will look like `here <>`_. From 29dff152853d1bf00d37bfde75de652561f644aa Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 21:42:07 +0000 Subject: [PATCH 34/68] add create_inj and injections_config example files --- .../pycbc_make_inference_inj_workflow.rst | 8 +- .../bbh_inj-dynesty/create_inj_workflow.sh | 20 +++ .../bbh_inj-dynesty/injections_config.ini | 122 ++++++++++++++++++ 3 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh create mode 100644 examples/workflow/inference/bbh_inj-dynesty/injections_config.ini diff --git a/docs/workflow/pycbc_make_inference_inj_workflow.rst b/docs/workflow/pycbc_make_inference_inj_workflow.rst index 511c71f6b38..bd8a195e34b 100644 --- a/docs/workflow/pycbc_make_inference_inj_workflow.rst +++ b/docs/workflow/pycbc_make_inference_inj_workflow.rst @@ -89,10 +89,10 @@ As discussed above, we can use the same :download:`workflow configuration file < the ``dynesty`` example in the standard workflow. We need to create an ``injections_config.ini`` file to go along with the ``workflow_config.ini``: -.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-dynesty/injections_config.ini +.. literalinclude:: ../../examples/workflow/inference/bbh_inj-dynesty/injections_config.ini :language: ini -:download:`Download <../../examples/workflow/inference/gw150914_gw170814-dynesty/injections_config.ini>` +:download:`Download <../../examples/workflow/inference/bbh_inj-dynesty/injections_config.ini>` --------------------- Generate the workflow @@ -101,10 +101,10 @@ Generate the workflow Assuming that you have downloaded all of the configuration files to the same directory, you can generate the workflow by running the following script: -.. literalinclude:: ../../examples/workflow/inference/gw150914_gw170814-dynesty/create_inj_workflow.sh +.. literalinclude:: ../../examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh :language: bash -:download:`Download <../../examples/workflow/inference/gw150914_gw170814-dynesty/create_inj_workflow.sh>` +:download:`Download <../../examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh>` Note that you need to set the ``HTML_DIR`` before running. This tells the workflow where to save the results page when done. You can also change diff --git a/examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh b/examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh new file mode 100644 index 00000000000..a69f183dc1c --- /dev/null +++ b/examples/workflow/inference/bbh_inj-dynesty/create_inj_workflow.sh @@ -0,0 +1,20 @@ +set -e + +WORKFLOW_NAME=bbh_injections-dynesty +# Set the HTML_DIR to point to your public html page. This is where the results +# page will be written. +HTML_DIR='' +if [ "${HTML_DIR}" == '' ]; then + echo "Please set an HTML_DIR" + exit 1 +fi +SEED=983124 +# Set the number of injections to create. For a full PP test, we suggest using +# 100. +NINJ=10 +pycbc_make_inference_inj_workflow \ + --seed ${SEED} \ + --num-injections 10 \ + --config-files workflow_config.ini injections_config.ini \ + --workflow-name ${WORKFLOW_NAME} \ + --config-overrides results_page:output-path:${HTML_DIR}/${WORKFLOW_NAME} diff --git a/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini b/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini new file mode 100644 index 00000000000..7a6130b1b86 --- /dev/null +++ b/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini @@ -0,0 +1,122 @@ +[workflow-inference] +; The inference configuration file(s) and any overrides to use for all of the +; injections. +; If no injection file is provided on the command line, injections will be +; drawn from the prior specified in the inference config file +config-files = bbh-uniform_comoving_volume.ini + marginalized_phase.ini + emcee_pt-srcmasses_comoving_volume.ini + data.ini +; As with events sections in the standard workflow, you can specify +; config-overrides for the above file(s). Here, we will change the prior from +; uniform in comoving volume to uniform in the log10 of the comoving volume. +; We'll do this so as to get a distribution of injections with appreciable +; SNR. (A uniform in comoving volume prior leads to most of the injections +; having SNR < 7, and so we mostly end up seeing prior-in, prior-out.) +config-overrides = prior-comoving_volume:name:uniform_log10 +; Optionally, you may also specify the number of times to run inference on +; each injection by setting nruns. Each run will use different random seeds, +; and samples from the runs will be combined into a single posterior file for +; each injection. Not setting this is equivalent to nruns = 1 +;nruns = 1 + +; For the injection workflow, we need to add an executable to create the +; injections. Optionally, we may also add executables to perform +; percentile-percentile (pp) tests. +[executables] +create_injections = ${which:pycbc_create_injections} +; Executables for percentile-percentile test. These are optional. If you do +; not include them in this section, no PP test will be done. If you do +; include them, all 3 must be included. +pp_table_summary = ${which:pycbc_inference_pp_table_summary} +plot_pp = ${which:pycbc_inference_plot_pp} +inj_recovery = ${which:pycbc_inference_plot_inj_recovery} +; We do not need to provide any of the other executables since they are +; specified in workflow_config.ini. When this file is combined with +; workflow_config.ini, these options are automatically added to the +; [executables] section in that file. + + +[workflow-pp_test] +; Since we have included the executables to make the PP plots, we need to +; provide this section. +; The pp-params option specifies what parameters to perform the percentile- +; percentile test on. If you do not provide anything, all parameters +; in the posterior file will be used (that is set by the parameters +; argument in the [extract_posterior] section in workflow_config.ini). A +; p-value of p-values will be calculated for all parameters and reported +; in the summary table on the Percentile-Percentile table of the results +; page. We therefore do not want to include all parameters in the posterior +; file, since we have added parameters that are derived from the others in +; [extract_posterior] section. For this reason, we manually list all the +; parameters we want to do the pp test on here: +pp-params = delta_tc srcmass1 srcmass2 spin1_a spin1_azimuthal spin1_polar + spin2_a spin2_azimuthal spin2_polar distance inclination + polarization ra dec +; In order to do the PP test, the code needs to know what parameters in the +; posterior correspond to which injection parameters. Since we have applied +; some functions to the samples parameters when creating the posterior file +; (again, refer to the [extract_posterior] section in workflow_config.ini), +; the mapping between posterior parameters and injection parameters is no +; longer a 1:1. To tell the code how to map from the injections parameters +; the posterior parameters, we provide the following injection-samples-map. +; We can just copy most of the parameters argument from the [extract_posterior] +; section for this (we can't just do a reference because the wildcard (*) that +; is there is not understood by the injection-samples-map option. +injection-samples-map = 'primary_mass(srcmass1, srcmass2):srcmass1' + 'secondary_mass(srcmass1, srcmass2):srcmass2' + 'primary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin1_a' + 'primary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin1_azimuthal' + 'primary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin1_polar' + 'secondary_spin(srcmass1, srcmass2, spin1_a, spin2_a):spin2_a' + 'secondary_spin(srcmass1, srcmass2, spin1_azimuthal, spin2_azimuthal):spin2_azimuthal' + 'secondary_spin(srcmass1, srcmass2, spin1_polar, spin2_polar):spin2_polar' + 'mchirp_from_mass1_mass2(srcmass1, srcmass2):srcmchirp' + 'chi_eff_from_spherical(srcmass1, srcmass2, spin1_a, spin1_polar, spin2_a, spin2_polar):chi_eff' + 'chi_p_from_spherical(srcmass1, srcmass2, spin1_a, spin1_azimuthal, spin1_polar, spin2_a, spin2_azimuthal, spin2_polar):chi_p' + 'redshift_from_comoving_volume(comoving_volume):redshift' + 'distance_from_comoving_volume(comoving_volume):distance' +; Notice that we can provide more parameters to the injection-samples-map then +; what we will be using in the PP test. This is fine, extra parameters are +; just ignored. By providing all of the parameters here, we can re-use this +; argument for the posterior plots (see below). + +[create_injections] +; Options for the create_injections executable. Do not provide a config file +; nor the number of injections to create here. The inference +; config file is used for generating injections, and the number is determined +; by the command-line options given to make_inference_inj_workflow + +[plot_posterior_summary] +; Adding plot-injection-parameters will cause a red line to be plotted on +; the posterior plots showing the injection parameters. +plot-injection-parameters = +; In order for the redline to be plotted in the right place, we have to +; provide an injection samples map. We can just use what was used in the +; workflow-pp_test section. +injection-samples-map = ${workflow-pp_test|injection-samples-map} +; We do not need to provide any arguments, as the rest are set in +; workflow_config.ini. + +[plot_posterior] +; Do the same for the full corner plots. +plot-injection-parameters = +injection-samples-map = ${workflow-pp_test|injection-samples-map} +; We do not need to provide any arguments, as the rest are set in +; workflow_config.ini. + +[pp_table_summary] +; command line options for percentile-percentile table summary +; do not provide parameters or injection-samples map here, as that is read +; from the [workflow-pp_test] section + +[plot_pp] +; command line options for percentile-percentile plot +; do not provide parameters or injection-samples map here, as that is read +; from the [workflow-pp_test] section + +[inj_recovery] +; command line options for injection recovery plots +; do not provide parameters or injection-samples map here, as that is read +; from the [workflow-pp_test] section + From e38ec2e2b7d91b6eb05d619c25210bd87fb77cbb Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 21:57:57 +0000 Subject: [PATCH 35/68] fix typo --- .../workflow/inference/bbh_inj-dynesty/injections_config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini b/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini index 7a6130b1b86..b9aa23d756e 100644 --- a/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini +++ b/examples/workflow/inference/bbh_inj-dynesty/injections_config.ini @@ -5,7 +5,7 @@ ; drawn from the prior specified in the inference config file config-files = bbh-uniform_comoving_volume.ini marginalized_phase.ini - emcee_pt-srcmasses_comoving_volume.ini + dynesty.ini data.ini ; As with events sections in the standard workflow, you can specify ; config-overrides for the above file(s). Here, we will change the prior from From fa548c8c0b73c990092e7fc0f60ebaf8a374754f Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 22:02:07 +0000 Subject: [PATCH 36/68] more typos --- docs/workflow/pycbc_make_inference_inj_workflow.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/workflow/pycbc_make_inference_inj_workflow.rst b/docs/workflow/pycbc_make_inference_inj_workflow.rst index bd8a195e34b..cab9be44cd7 100644 --- a/docs/workflow/pycbc_make_inference_inj_workflow.rst +++ b/docs/workflow/pycbc_make_inference_inj_workflow.rst @@ -12,7 +12,7 @@ Optionally, it can also run a percentile-percentile on the injections it analyzed. The workflow is very similar to the standard inference workflow created by -`pycbc_make_inference_workflow `. The main +`pycbc_make_inference_workflow `_. The main differences are: * Rather than providing one or more ``[event-{label}]`` sections in the @@ -44,7 +44,7 @@ differences are: This may also require providing an ``injection-samples-map`` argument. See the example file below for details. -In the `standard workflow ` we used two +In the `standard workflow `_ we used two workflow configuration files, a ``workflow_config.ini`` and an ``events.ini``. For the injection workflow, we can use the same ``workflow_config.ini``; we just need to setup an ``injections_config.ini`` to add the needed sections @@ -52,7 +52,7 @@ and arguments for the injections workflow. In the example below, we demonstrate how to use the injections workflow using the same prior and sampler settings as given in the -`standard workflow ` example. +`standard workflow `_ example. ======================================== @@ -61,7 +61,7 @@ Example: BBH injections with ``dynesty`` In this example we use the same prior and sampler settings as the example of analyzing GW150914 and GW170814 in the -`pycbc_make_inference_workflow ` +`pycbc_make_inference_workflow `_ documentation. We will analyze 10 injections, and do a percentile-percentile test on them. (This is only as an example. To do a full PP test, we recommend using at least 100 injections.) @@ -75,11 +75,11 @@ We can use the same :download:`model <../../examples/inference/models/marginalized_phase.ini>`, and :download:`sampler <../../examples/inference/samplers/dynesty.ini>` configuration files as used in the -`pycbc_make_inference_workflow ` example. +`pycbc_make_inference_workflow `_ example. However, instead of analyzing O1 or O2 data, we will create fake Gaussian noise. To do that, we will use the :download:`data.ini <../../examples/inference/bbh-injection/data.ini>` file -used for the `BBH simulation example <../inference/examples/bbh.rst>`. +used for the `BBH simulation example <../inference/examples/bbh.rst>`_. ------------------------------------- Setup the workflow configuration file From 46e82666104072492e1771d80f6e2dc396a1f964 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Thu, 25 Jun 2020 22:14:14 +0000 Subject: [PATCH 37/68] fix broken links --- docs/workflow/pycbc_make_inference_inj_workflow.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/workflow/pycbc_make_inference_inj_workflow.rst b/docs/workflow/pycbc_make_inference_inj_workflow.rst index cab9be44cd7..9757b3b9478 100644 --- a/docs/workflow/pycbc_make_inference_inj_workflow.rst +++ b/docs/workflow/pycbc_make_inference_inj_workflow.rst @@ -12,7 +12,7 @@ Optionally, it can also run a percentile-percentile on the injections it analyzed. The workflow is very similar to the standard inference workflow created by -`pycbc_make_inference_workflow `_. The main +`pycbc_make_inference_workflow `_. The main differences are: * Rather than providing one or more ``[event-{label}]`` sections in the @@ -44,7 +44,7 @@ differences are: This may also require providing an ``injection-samples-map`` argument. See the example file below for details. -In the `standard workflow `_ we used two +In the `standard workflow `_ we used two workflow configuration files, a ``workflow_config.ini`` and an ``events.ini``. For the injection workflow, we can use the same ``workflow_config.ini``; we just need to setup an ``injections_config.ini`` to add the needed sections @@ -52,7 +52,7 @@ and arguments for the injections workflow. In the example below, we demonstrate how to use the injections workflow using the same prior and sampler settings as given in the -`standard workflow `_ example. +`standard workflow `_ example. ======================================== @@ -61,7 +61,7 @@ Example: BBH injections with ``dynesty`` In this example we use the same prior and sampler settings as the example of analyzing GW150914 and GW170814 in the -`pycbc_make_inference_workflow `_ +`pycbc_make_inference_workflow `_ documentation. We will analyze 10 injections, and do a percentile-percentile test on them. (This is only as an example. To do a full PP test, we recommend using at least 100 injections.) @@ -75,11 +75,11 @@ We can use the same :download:`model <../../examples/inference/models/marginalized_phase.ini>`, and :download:`sampler <../../examples/inference/samplers/dynesty.ini>` configuration files as used in the -`pycbc_make_inference_workflow `_ example. +`pycbc_make_inference_workflow `_ example. However, instead of analyzing O1 or O2 data, we will create fake Gaussian noise. To do that, we will use the :download:`data.ini <../../examples/inference/bbh-injection/data.ini>` file -used for the `BBH simulation example <../inference/examples/bbh.rst>`_. +used for the `BBH simulation example <../inference/examples/bbh>`_. ------------------------------------- Setup the workflow configuration file From 615c21677af97d7627cd15c806a4eba45e32e153 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Fri, 26 Jun 2020 06:08:12 +0000 Subject: [PATCH 38/68] fix typo --- docs/workflow/pycbc_make_inference_workflow.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/workflow/pycbc_make_inference_workflow.rst b/docs/workflow/pycbc_make_inference_workflow.rst index 8f121d16dca..2d5a33950ec 100644 --- a/docs/workflow/pycbc_make_inference_workflow.rst +++ b/docs/workflow/pycbc_make_inference_workflow.rst @@ -229,9 +229,9 @@ page to the directory you specified in the ``create_workflow.sh`` script. You can see what the result page will look like `here `_. -============================================= -Example: GW150914 and GW170814 with `dynesty` -============================================= +=============================================== +Example: GW150914 and GW170814 with ``dynesty`` +=============================================== In this example, we repeat the above analysis, but using the `dynesty` sampler. We can use the same From 15bf964cb5ae47bd59ebe8ff9d106a982a8f014d Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Fri, 26 Jun 2020 06:08:24 +0000 Subject: [PATCH 39/68] add link to results page --- docs/workflow/pycbc_make_inference_inj_workflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflow/pycbc_make_inference_inj_workflow.rst b/docs/workflow/pycbc_make_inference_inj_workflow.rst index 9757b3b9478..50f3b809038 100644 --- a/docs/workflow/pycbc_make_inference_inj_workflow.rst +++ b/docs/workflow/pycbc_make_inference_inj_workflow.rst @@ -154,4 +154,4 @@ Results page When the workflow has completed successfully it will write out the results page to the directory you specified in the ``create_inj_workflow.sh`` script. -You can see what the result page will look like `here <>`_. +You can see what the result page will look like `here `_. From 15afc4b7ea2b7653122c24ce6d356a1bf8eca0fc Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Mon, 29 Jun 2020 14:31:32 +0200 Subject: [PATCH 40/68] Update timeseries.py (#3346) --- pycbc/types/timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/types/timeseries.py b/pycbc/types/timeseries.py index ecb5b59d48a..a40a092c192 100644 --- a/pycbc/types/timeseries.py +++ b/pycbc/types/timeseries.py @@ -451,7 +451,7 @@ def save_to_wav(self, file_name): The output file name """ scaled = _numpy.int16(self.numpy()/max(abs(self)) * 32767) - write_wav(file_name, self.sample_rate, scaled) + write_wav(file_name, int(self.sample_rate), scaled) def psd(self, segment_duration, **kwds): """ Calculate the power spectral density of this time series. From b147550eaa324ac7b90f6a0337db16b4b21872b8 Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Mon, 29 Jun 2020 13:32:44 +0100 Subject: [PATCH 41/68] Allow frame types that change with time (#3342) * Add time dependent frame-type option in datafind.py * Allow time-variable frame type * Resolving whitespace concerns * Changes on the PR in response to feedback --- pycbc/workflow/datafind.py | 79 ++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/pycbc/workflow/datafind.py b/pycbc/workflow/datafind.py index deae7d48ccf..b98a1e9d73b 100644 --- a/pycbc/workflow/datafind.py +++ b/pycbc/workflow/datafind.py @@ -496,23 +496,68 @@ def setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, logging.info("Querying datafind server for all science segments.") for ifo, scienceSegsIfo in scienceSegs.items(): observatory = ifo[0].upper() - frameType = cp.get_opt_tags("workflow-datafind", - "datafind-%s-frame-type" % (ifo.lower()), tags) - # This REQUIRES a coalesced segment list to work - startTime = int(scienceSegsIfo[0][0]) - endTime = int(scienceSegsIfo[-1][1]) - try: - cache, cache_file = run_datafind_instance(cp, outputDir, connection, - observatory, frameType, startTime, - endTime, ifo, tags=tags) - except: - connection = setup_datafind_server_connection(cp, tags=tags) - cache, cache_file = run_datafind_instance(cp, outputDir, connection, - observatory, frameType, startTime, - endTime, ifo, tags=tags) - - datafindouts.append(cache_file) - datafindcaches.append(cache) + checked_times = segments.segmentlist([]) + frame_types = cp.get_opt_tags( + "workflow-datafind", + "datafind-%s-frame-type" % (ifo.lower()), tags + ) + # Check if this is one type, or time varying + frame_types = frame_types.replace(' ', '').strip().split(',') + for ftype in frame_types: + # Check the times, default to full time initially + # This REQUIRES a coalesced segment list to work + start = int(scienceSegsIfo[0][0]) + end = int(scienceSegsIfo[-1][1]) + # Then check for limits. We're expecting something like: + # value[start:end], so need to extract value, start and end + if '[' in ftype: + # This gets start and end out + bopt = ftype.split('[')[1].split(']')[0] + newstart, newend = bopt.split(':') + # Then check if the times are within science time + start = max(int(newstart), start) + end = min(int(newend), end) + if end <= start: + continue + # This extracts value + ftype = ftype.split('[')[0] + curr_times = segments.segment(start, end) + # The times here must be distinct. We cannot have two different + # frame files at the same time from the same ifo. + if checked_times.intersects_segment(curr_times): + err_msg = "Different frame types cannot overlap in time." + raise ValueError(err_msg) + checked_times.append(curr_times) + + # Ask datafind where the frames are + try: + cache, cache_file = run_datafind_instance( + cp, + outputDir, + connection, + observatory, + ftype, + start, + end, + ifo, + tags=tags + ) + except: + connection = setup_datafind_server_connection(cp, tags=tags) + cache, cache_file = run_datafind_instance( + cp, + outputDir, + connection, + observatory, + ftype, + start, + end, + ifo, + tags=tags + ) + + datafindouts.append(cache_file) + datafindcaches.append(cache) return datafindcaches, datafindouts def setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, From 65bef88fba50f329bb78b8f165f52ae0caca884f Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Tue, 30 Jun 2020 16:15:47 +0100 Subject: [PATCH 42/68] Allow options that depend on time (#3344) * Implement the idea of time-dependent options * Support TDOs in inspiral jobs * Support TDOs in minifollowups * Pull off into dedicated function * Calculate_psd can use dedicated function directly * Typo fix * Fixing up CC, one bug, and a few issues * Missed this --- bin/hdfcoinc/pycbc_calculate_psd | 14 +++-- pycbc/workflow/core.py | 87 +++++++++++++++++++++++++++++++- pycbc/workflow/jobsetup.py | 3 +- pycbc/workflow/minifollowups.py | 27 +++++++--- 4 files changed, 117 insertions(+), 14 deletions(-) diff --git a/bin/hdfcoinc/pycbc_calculate_psd b/bin/hdfcoinc/pycbc_calculate_psd index 069441fcefe..c89b53bbe75 100755 --- a/bin/hdfcoinc/pycbc_calculate_psd +++ b/bin/hdfcoinc/pycbc_calculate_psd @@ -1,12 +1,13 @@ #!/usr/bin/env python """ Calculate psd estimates for analysis segments """ -import logging, argparse, numpy, h5py, multiprocessing, time +import logging, argparse, numpy, h5py, multiprocessing, time, copy from six.moves import (range, zip_longest) import pycbc, pycbc.psd, pycbc.strain, pycbc.events from pycbc.version import git_verbose_msg as version from pycbc.fft.fftw import set_measure_level -from ligo.segments import segmentlist +from pycbc.workflow import resolve_td_option +from ligo.segments import segmentlist, segment set_measure_level(0) parser = argparse.ArgumentParser(description=__doc__) @@ -43,13 +44,16 @@ def get_psd(input_tuple): logging.info('%d: getting strain for %.1f-%.1f (%.1f s)', i, seg[0], seg[1], abs(seg)) - args.gps_start_time = int(seg[0]) + args.pad_data - args.gps_end_time = int(seg[1]) - args.pad_data + argstmp = copy.deepcopy(args) + argstmp.gps_start_time = int(seg[0]) + args.pad_data + argstmp.gps_end_time = int(seg[1]) - args.pad_data + tmp_segment = segment([argstmp.gps_start_time, argstmp.gps_end_time]) + argstmp.channel_name = resolve_td_option(args.channel_name, tmp_segment) # This helps when the filesystem is unreliable, and gives extra retries. # python has an internal limit of ~100 (it is not infinite) try: - gwstrain = pycbc.strain.from_cli(args, pycbc.DYN_RANGE_FAC) + gwstrain = pycbc.strain.from_cli(argstmp, pycbc.DYN_RANGE_FAC) except RuntimeError: time.sleep(10) return get_psd((seg, i)) diff --git a/pycbc/workflow/core.py b/pycbc/workflow/core.py index e7579e8ae45..4e73725bdee 100644 --- a/pycbc/workflow/core.py +++ b/pycbc/workflow/core.py @@ -122,6 +122,16 @@ class Executable(pegasus_workflow.Executable): # file_input_options = ['--psd-file, '--bank-file'] (as an example) file_input_options = [] + # Set this parameter to indicate that this option should take different + # values based on the time. E.g. something like + # --option1 value1[0:1000],value2[1000:2000] + # would be replaced with --option1 value1 if the time is within 0,1000 and + # value2 if in 1000,2000. A failure will be replaced if the job time is + # not fully contained in one of these windows, or if fully contained in + # multiple of these windows. This is resolved when creating the Job from + # the Executable + time_dependent_options = [] + # This is the default value. It will give a warning if a class is # used where the retention level is not set. The file will still be stored KEEP_BUT_RAISE_WARNING = 5 @@ -382,6 +392,13 @@ def add_ini_opts(self, cp, sec): else: self.common_raw_options.append(curr_file.dax_repr) self.common_raw_options.append(' ') + elif opt in self.time_dependent_options: + # There is a possibility of time-dependent, file options. + # For now we will avoid supporting that complication unless + # it is needed. This would require resolving the file first + # in this function, and then dealing with the time-dependent + # stuff later. + self.unresolved_td_options[opt] = value else: self.common_options += [opt, value] @@ -567,6 +584,7 @@ def update_current_tags(self, tags): # from the ini file section(s) self.common_options = [] self.common_raw_options = [] + self.unresolved_td_options = {} self.common_input_files = [] for sec in sections: if self.cp.has_section(sec): @@ -843,10 +861,11 @@ def save_config(self, fname, output_dir, cp=None): class Node(pegasus_workflow.Node): - def __init__(self, executable): + def __init__(self, executable, valid_seg=None): super(Node, self).__init__(executable) self.executed = False self.set_category(executable.name) + self.valid_seg = valid_seg if executable.universe == 'vanilla' and executable.installed: self.add_profile('condor', 'getenv', 'True') @@ -859,6 +878,15 @@ def __init__(self, executable): for inp in self.executable.common_input_files: self._add_input(inp) + if len(self.executable.time_dependent_options): + # Resolving these options requires the concept of a valid time. + # To keep backwards compatibility we will allow this to work if + # valid_seg is not supplied and no option actually needs resolving. + # It would be good to get this from the workflow's valid_seg if + # not overriden. But the Node is not connected to the Workflow + # until the dax starts to be written. + self.resolve_td_options(self.executable.unresolved_td_options) + def get_command_line(self): self._finalize() arglist = self._dax_node.arguments @@ -985,6 +1013,10 @@ def new_multiifo_output_list_opt(self, opt, ifos, analysis_time, extension, output_files.append(curr_file) self.add_multiifo_output_list_opt(opt, output_files) + def resolve_td_options(self, td_options): + for opt in td_options: + new_opt = resolve_td_option(td_options[opt], self.valid_seg) + self._options += [opt, new_opt] @property def output_files(self): @@ -1003,6 +1035,7 @@ def output_file(self): raise ValueError(err_msg) return out_files[0] + class File(pegasus_workflow.File): ''' This class holds the details of an individual output file @@ -2006,6 +2039,58 @@ def get_random_label(): for _ in range(15)) +def resolve_td_option(val_str, valid_seg): + """ + Take an option which might be time-dependent and resolve it + + Some options might take different values depending on the GPS time. For + example if you want opt_1 to take value_a if the time is between 10 and + 100, value_b if between 100 and 250, and value_c if between 250 and 500 you + can supply: + + value_a[10:100],value_b[100:250],value_c[250:500]. + + This function will parse that string (as opt) and return the value fully + contained in valid_seg. If valid_seg is not full contained in one, and only + one, of these options. The code will fail. If given a simple option like: + + value_a + + The function will just return value_a. + """ + # Track if we've already found a matching option + output = '' + # Strip any whitespace, and split on comma + curr_vals = val_str.replace(' ', '').strip().split(',') + + # Resolving the simple case is trivial and can be done immediately. + if len(curr_vals) == 1 and '[' not in curr_vals[0]: + return curr_vals[0] + + # Loop over all possible values + for cval in curr_vals: + start = int(valid_seg[0]) + end = int(valid_seg[1]) + # Extract limits for each case, and check overlap with valid_seg + if '[' in cval: + bopt = cval.split('[')[1].split(']')[0] + start, end = bopt.split(':') + cval = cval.replace('[' + bopt + ']', '') + curr_seg = segments.segment(int(start), int(end)) + # The segments module is a bit weird so we need to check if the two + # overlap using the following code. If valid_seg is fully within + # curr_seg this will be true. + if curr_seg.intersects(valid_seg) and \ + (curr_seg & valid_seg == valid_seg): + if output: + err_msg = "Time-dependent options must be disjoint." + raise ValueError(err_msg) + output = cval + if not output: + err_msg = "Could not resolve option {}".format(val_str) + raise ValueError + return output + def add_workflow_settings_cli(parser, include_subdax_opts=False): """Adds workflow options to an argument parser. diff --git a/pycbc/workflow/jobsetup.py b/pycbc/workflow/jobsetup.py index 9f68860f391..0f305b35968 100644 --- a/pycbc/workflow/jobsetup.py +++ b/pycbc/workflow/jobsetup.py @@ -625,6 +625,7 @@ class PyCBCInspiralExecutable(Executable): current_retention_level = Executable.ALL_TRIGGERS file_input_options = ['--gating-file'] + time_dependent_options = ['--channel-name'] def __init__(self, cp, exe_name, ifo=None, out_dir=None, injection_file=None, tags=None, reuse_executable=False): @@ -653,7 +654,7 @@ def __init__(self, cp, exe_name, ifo=None, out_dir=None, def create_node(self, data_seg, valid_seg, parent=None, dfParents=None, tags=None): if tags is None: tags = [] - node = Node(self) + node = Node(self, valid_seg=valid_seg) if not self.has_opt('pad-data'): raise ValueError("The option pad-data is a required option of " "%s. Please check the ini file." % self.name) diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py index cbde9c42162..9e938a65932 100644 --- a/pycbc/workflow/minifollowups.py +++ b/pycbc/workflow/minifollowups.py @@ -18,6 +18,7 @@ from six.moves.urllib.request import pathname2url from six.moves.urllib.parse import urljoin import distutils.spawn +from ligo import segments from pycbc.workflow.core import Executable, FileList, Node, makedir, File, Workflow from pycbc.workflow.plotting import PlotExecutable, requirestr, excludestr try: @@ -344,6 +345,7 @@ class SingleTemplateExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] + time_dependent_options = ['--channel-name'] class SingleTimeFreqExecutable(PlotExecutable): @@ -352,6 +354,8 @@ class SingleTimeFreqExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] + time_dependent_options = ['--channel-name'] + class PlotQScanExecutable(PlotExecutable): """Class to be used for to create workflow.Executable instances for the @@ -359,6 +363,7 @@ class PlotQScanExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] + time_dependent_options = ['--channel-name'] def make_single_template_plots(workflow, segs, data_read_name, analyzed_name, @@ -429,9 +434,14 @@ def make_single_template_plots(workflow, segs, data_read_name, analyzed_name, if params['%s_end_time' % ifo] == -1.0: continue # Reanalyze the time around the trigger in each detector - node = SingleTemplateExecutable(workflow.cp, 'single_template', - ifos=[ifo], out_dir=out_dir, - tags=[tag] + tags).create_node() + curr_exe = SingleTemplateExecutable(workflow.cp, 'single_template', + ifos=[ifo], out_dir=out_dir, + tags=[tag] + tags) + start = int(params[ifo + '_end_time']) + end = start + 1 + cseg = segments.segment([start, end]) + node = curr_exe.create_node(valid_seg=cseg) + if use_exact_inj_params: node.add_opt('--use-params-of-closest-injection') else: @@ -650,12 +660,13 @@ def make_qscan_plot(workflow, ifo, trig_time, out_dir, injection_file=None, curr_exe = PlotQScanExecutable(workflow.cp, name, ifos=[ifo], out_dir=out_dir, tags=tags) - node = curr_exe.create_node() # Determine start/end times, using data segments if needed. # Begin by choosing "optimal" times start = trig_time - time_window end = trig_time + time_window + node = curr_exe.create_node(valid_seg=segments.segment([start, end])) + # Then if data_segments is available, check against that, and move if # needed if data_segments is not None: @@ -747,14 +758,16 @@ def make_singles_timefreq(workflow, single, bank_file, trig_time, out_dir, curr_exe = SingleTimeFreqExecutable(workflow.cp, name, ifos=[single.ifo], out_dir=out_dir, tags=tags) - node = curr_exe.create_node() - node.add_input_opt('--trig-file', single) - node.add_input_opt('--bank-file', bank_file) # Determine start/end times, using data segments if needed. # Begin by choosing "optimal" times start = trig_time - time_window end = trig_time + time_window + + node = curr_exe.create_node(valid_seg=segments.segment([start, end])) + node.add_input_opt('--trig-file', single) + node.add_input_opt('--bank-file', bank_file) + # Then if data_segments is available, check against that, and move if # needed if data_segments is not None: From 2b600ad7e65c72432a284722492bd7a9f9b1a240 Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Thu, 2 Jul 2020 11:48:59 +0100 Subject: [PATCH 43/68] Sieve times when frame reading (#3348) * Sieve only points in time range * Is INT4 not LIGOTimeGPS * math.ceil * Need to catch 0 or -ve duration earlier --- pycbc/frame/frame.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pycbc/frame/frame.py b/pycbc/frame/frame.py index 6656a3e522f..973b29dfe16 100644 --- a/pycbc/frame/frame.py +++ b/pycbc/frame/frame.py @@ -20,6 +20,7 @@ import lalframe, logging import lal import numpy +import math import os.path, glob, time import gwdatafind from six.moves.urllib.parse import urlparse @@ -191,6 +192,12 @@ def read_frame(location, channels, start_time=None, if sieve: logging.info("Using frames that match regexp: %s", sieve) lal.CacheSieve(cum_cache, 0, 0, None, None, sieve) + if start_time is not None and end_time is not None: + # Before sieving, check if this is sane. Otherwise it will fail later. + if (int(math.ceil(end_time)) - int(start_time)) <= 0: + raise ValueError("Negative or null duration") + lal.CacheSieve(cum_cache, int(start_time), int(math.ceil(end_time)), + None, None, None) stream = lalframe.FrStreamCacheOpen(cum_cache) stream.mode = lalframe.FR_STREAM_VERBOSE_MODE From 9da9e2febe21c93d8ceea8b5a56a867b26d3b1ee Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Fri, 3 Jul 2020 17:47:34 +0200 Subject: [PATCH 44/68] Add duration support for Phenom HM waveforms (#3350) * add duration support for phenom hm waveforms * estimate by scaling frequency --- pycbc/waveform/waveform.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/pycbc/waveform/waveform.py b/pycbc/waveform/waveform.py index 9a2acaf06a5..86696a1abd8 100644 --- a/pycbc/waveform/waveform.py +++ b/pycbc/waveform/waveform.py @@ -801,6 +801,22 @@ def imrphenomd_length_in_time(**kwds): """ return get_imr_length("IMRPhenomD", **kwds) +def imrphenomhm_length_in_time(**kwargs): + """Estimates the duration of IMRPhenom waveforms that include higher modes. + """ + if 'mode_array' in kwargs and kwargs['mode_array'] is not None: + maxm = max(m for _, m in kwargs['mode_array']) + else: + # the highest m for all of these is 4 (from the 4,4 mode) + maxm = 4 + # we'll use the PhenomD length, with the frequency scaled by 2/m + try: + flow = kwargs['f_lower'] + except KeyError: + raise ValueError("must provide a f_lower") + kwargs['f_lower'] = flow * 2./maxm + return get_imr_length("IMRPhenomD", **kwargs) + _filter_norms["SPAtmplt"] = spa_tmplt_norm _filter_preconditions["SPAtmplt"] = spa_tmplt_precondition @@ -832,8 +848,10 @@ def imrphenomd_length_in_time(**kwds): _filter_time_lengths["IMRPhenomPv2"] = imrphenomd_length_in_time _filter_time_lengths["IMRPhenomD_NRTidal"] = imrphenomd_length_in_time _filter_time_lengths["IMRPhenomPv2_NRTidal"] = imrphenomd_length_in_time -_filter_time_lengths["IMRPhenomHM"] = imrphenomd_length_in_time -_filter_time_lengths["IMRPhenomPv3HM"] = imrphenomd_length_in_time +_filter_time_lengths["IMRPhenomHM"] = imrphenomhm_length_in_time +_filter_time_lengths["IMRPhenomPv3HM"] = imrphenomhm_length_in_time +_filter_time_lengths["IMRPhenomXHM"] = imrphenomhm_length_in_time +_filter_time_lengths["IMRPhenomXPHM"] = imrphenomhm_length_in_time _filter_time_lengths["SpinTaylorF2"] = spa_length_in_time _filter_time_lengths["TaylorF2NL"] = spa_length_in_time _filter_time_lengths["PreTaylorF2"] = spa_length_in_time From 272e376b7e1a4f81212dec4db318f7b79a5607fe Mon Sep 17 00:00:00 2001 From: Gareth S Davies Date: Fri, 3 Jul 2020 18:09:58 +0200 Subject: [PATCH 45/68] get hdfinjfind to work on multiifo_statmap files (#3343) --- bin/hdfcoinc/pycbc_coinc_hdfinjfind | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/hdfcoinc/pycbc_coinc_hdfinjfind b/bin/hdfcoinc/pycbc_coinc_hdfinjfind index a75f80b1d99..63e815e4c3e 100755 --- a/bin/hdfcoinc/pycbc_coinc_hdfinjfind +++ b/bin/hdfcoinc/pycbc_coinc_hdfinjfind @@ -255,10 +255,14 @@ for trigger_file, injection_file in zip(args.trigger_files, continue if key not in fo: fo.create_group(key) - fo[key].attrs['pivot'] = f[key].attrs['pivot'] - fo[key].attrs['fixed'] = f[key].attrs['fixed'] - fo[key].attrs['foreground_time'] = f[key].attrs['foreground_time'] - fo[key].attrs['foreground_time_exc'] = f[key].attrs['foreground_time_exc'] + if key in f: + fkey = f[key] + else: + fkey = f + fo[key].attrs['pivot'] = fkey.attrs['pivot'] + fo[key].attrs['fixed'] = fkey.attrs['fixed'] + fo[key].attrs['foreground_time'] = fkey.attrs['foreground_time'] + fo[key].attrs['foreground_time_exc'] = fkey.attrs['foreground_time_exc'] else: fo.attrs['detector_1'] = f.attrs['detector_1'] fo.attrs['detector_2'] = f.attrs['detector_2'] From edf2b1464fac3f45e846cab9ba1949cdce2b81a6 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Tue, 7 Jul 2020 13:05:03 +0200 Subject: [PATCH 46/68] add checkpointing for Dynesty (#3354) * initial version * updates and fixes * cc Co-authored-by: Sumit Kumar --- pycbc/inference/io/__init__.py | 40 +++--- pycbc/inference/io/dynesty.py | 30 ++++ pycbc/inference/sampler/base.py | 6 +- pycbc/inference/sampler/cpnest.py | 4 +- pycbc/inference/sampler/dynesty.py | 208 +++++++++++++++++++++------ pycbc/inference/sampler/multinest.py | 2 +- pycbc/io/hdf.py | 3 +- pycbc/pool.py | 2 + 8 files changed, 228 insertions(+), 67 deletions(-) diff --git a/pycbc/inference/io/__init__.py b/pycbc/inference/io/__init__.py index d8982b5b505..7275c7fbaa6 100644 --- a/pycbc/inference/io/__init__.py +++ b/pycbc/inference/io/__init__.py @@ -29,6 +29,7 @@ import h5py as _h5py from pycbc.io.record import (FieldArray, _numpy_function_lib) from pycbc import waveform as _waveform +from pycbc.io.hdf import (dump_state, load_state) from pycbc.inference.option_utils import (ParseLabelArg, ParseParametersArg) from .emcee import EmceeFile @@ -175,17 +176,18 @@ def check_integrity(filename): ref_shape = fp[group.format(parameters[0])].shape if not all(fp[group.format(param)].shape == ref_shape for param in parameters): - raise IOError("not all datasets in the samples group have the " - "same shape") + raise IOError("not all datasets in the samples group have " + "the same shape") # check that we can read the first/last sample firstidx = tuple([0]*len(ref_shape)) lastidx = tuple([-1]*len(ref_shape)) - for param in parameters: - _ = fp[group.format(param)][firstidx] - _ = fp[group.format(param)][lastidx] + for param in parameters: + _ = fp[group.format(param)][firstidx] + _ = fp[group.format(param)][lastidx] -def validate_checkpoint_files(checkpoint_file, backup_file): +def validate_checkpoint_files(checkpoint_file, backup_file, + check_nsamples=True): """Checks if the given checkpoint and/or backup files are valid. The checkpoint file is considered valid if: @@ -222,12 +224,14 @@ def validate_checkpoint_files(checkpoint_file, backup_file): checkpoint_valid = True except (ValueError, KeyError, IOError): checkpoint_valid = False + # backup file try: check_integrity(backup_file) backup_valid = True except (ValueError, KeyError, IOError): backup_valid = False + # since we can open the file, run self diagnostics if checkpoint_valid: with loadfile(checkpoint_file, 'r') as fp: @@ -235,17 +239,19 @@ def validate_checkpoint_files(checkpoint_file, backup_file): if backup_valid: with loadfile(backup_file, 'r') as fp: backup_valid = fp.validate() - # check that the checkpoint and backup have the same number of samples; - # if not, assume the checkpoint has the correct number - if checkpoint_valid and backup_valid: - with loadfile(checkpoint_file, 'r') as fp: - group = list(fp[fp.samples_group].keys())[0] - nsamples = fp[fp.samples_group][group].size - with loadfile(backup_file, 'r') as fp: - group = list(fp[fp.samples_group].keys())[0] - backup_nsamples = fp[fp.samples_group][group].size - backup_valid = nsamples == backup_nsamples - # decide what to do based on the files' statuses + if check_nsamples: + # This check is not required by nested samplers + # check that the checkpoint and backup have the same number of samples; + # if not, assume the checkpoint has the correct number + if checkpoint_valid and backup_valid: + with loadfile(checkpoint_file, 'r') as fp: + group = list(fp[fp.samples_group].keys())[0] + nsamples = fp[fp.samples_group][group].size + with loadfile(backup_file, 'r') as fp: + group = list(fp[fp.samples_group].keys())[0] + backup_nsamples = fp[fp.samples_group][group].size + backup_valid = nsamples == backup_nsamples + # decide what to do based on the files' statuses if checkpoint_valid and not backup_valid: # copy the checkpoint to the backup logging.info("Backup invalid; copying checkpoint file") diff --git a/pycbc/inference/io/dynesty.py b/pycbc/inference/io/dynesty.py index 52902211221..c60363315b3 100644 --- a/pycbc/inference/io/dynesty.py +++ b/pycbc/inference/io/dynesty.py @@ -23,9 +23,39 @@ # """Provides IO for the dynesty sampler. """ +from pycbc.io.hdf import (dump_state, load_state) from .base_nested_sampler import BaseNestedSamplerFile class DynestyFile(BaseNestedSamplerFile): """Class to handle file IO for the ``dynesty`` sampler.""" name = 'dynesty_file' + + def write_pickled_data_into_checkpoint_file(self, state): + """Dump the sampler state into checkpoint file + """ + if 'sampler_info/saved_state' not in self: + self.create_group('sampler_info/saved_state') + dump_state(state, self, path='sampler_info/saved_state') + + def read_pickled_data_from_checkpoint_file(self): + """Load the sampler state (pickled) from checkpoint file + """ + return load_state(self, path='sampler_info/saved_state') + + def validate(self): + """Runs a validation test. + This checks that a samples group exist, and that pickeled data can + be loaded. + Returns + ------- + bool : + Whether or not the file is valid as a checkpoint file. + """ + try: + if 'sampler_info/saved_state' in self: + load_state(self, path='sampler_info/saved_state') + checkpoint_valid = True + except KeyError: + checkpoint_valid = False + return checkpoint_valid diff --git a/pycbc/inference/sampler/base.py b/pycbc/inference/sampler/base.py index f1476c9f28e..7e34d3f9236 100644 --- a/pycbc/inference/sampler/base.py +++ b/pycbc/inference/sampler/base.py @@ -155,7 +155,7 @@ def resume_from_checkpoint(self): # -def setup_output(sampler, output_file): +def setup_output(sampler, output_file, check_nsamples=True): r"""Sets up the sampler's checkpoint and output files. The checkpoint file has the same name as the output file, but with @@ -175,7 +175,9 @@ def setup_output(sampler, output_file): # check if we have a good checkpoint and/or backup file logging.info("Looking for checkpoint file") checkpoint_valid = validate_checkpoint_files(checkpoint_file, - backup_file) + backup_file, + check_nsamples) + # Create a new file if the checkpoint doesn't exist, or if it is # corrupted sampler.new_checkpoint = False # keeps track if this is a new file or not diff --git a/pycbc/inference/sampler/cpnest.py b/pycbc/inference/sampler/cpnest.py index fe4a792010e..b4a93ff3416 100644 --- a/pycbc/inference/sampler/cpnest.py +++ b/pycbc/inference/sampler/cpnest.py @@ -119,7 +119,7 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, verbose=verbose, loglikelihood_function=loglikelihood_function) - setup_output(obj, output_file) + setup_output(obj, output_file, check_nsamples=False) if not obj.new_checkpoint: obj.resume_from_checkpoint() return obj @@ -139,7 +139,7 @@ def finalize(self): self.write_results(fn) logging.info("Validating checkpoint and backup files") checkpoint_valid = validate_checkpoint_files( - self.checkpoint_file, self.backup_file) + self.checkpoint_file, self.backup_file, check_nsamples=False) if not checkpoint_valid: raise IOError("error writing to checkpoint file") diff --git a/pycbc/inference/sampler/dynesty.py b/pycbc/inference/sampler/dynesty.py index 866758f407c..a7fd4972216 100644 --- a/pycbc/inference/sampler/dynesty.py +++ b/pycbc/inference/sampler/dynesty.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 Collin Capano, Sumit Kumar +# Copyright (C) 2019 Collin Capano, Sumit Kumar, Prayush Kumar # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 3 of the License, or (at your @@ -30,11 +30,15 @@ from __future__ import absolute_import import logging -from pycbc.pool import choose_pool +import copy +import os +import time import numpy import dynesty -from dynesty.utils import resample_equal -from pycbc.inference.io import (DynestyFile, validate_checkpoint_files) +from pycbc.pool import choose_pool +from dynesty import utils as dyfunc +from pycbc.inference.io import (DynestyFile, validate_checkpoint_files, + loadfile) from pycbc.distributions import read_constraints_from_config from .base import (BaseSampler, setup_output) from .base_mcmc import get_optional_arg_from_config @@ -69,38 +73,85 @@ class DynestySampler(BaseSampler): _io = DynestyFile def __init__(self, model, nlive, nprocesses=1, - loglikelihood_function=None, use_mpi=False, run_kwds=None, - **kwargs): + checkpoint_time_interval=None, maxcall=None, + loglikelihood_function=None, use_mpi=False, + run_kwds=None, **kwargs): + self.model = model log_likelihood_call, prior_call = setup_calls( model, nprocesses=nprocesses, loglikelihood_function=loglikelihood_function) # Set up the pool - pool = choose_pool(mpi=use_mpi, processes=nprocesses) - if pool is not None: - pool.size = nprocesses + self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) + self.maxcall = maxcall + self.checkpoint_time_interval = checkpoint_time_interval self.run_kwds = {} if run_kwds is None else run_kwds self.nlive = nlive self.names = model.sampling_params self.ndim = len(model.sampling_params) self.checkpoint_file = None + # Enable checkpointing if checkpoint_time_interval is set in config + # file in sampler section + if self.checkpoint_time_interval: + self.run_with_checkpoint = True + if self.maxcall is None: + self.maxcall = 5000 * nprocesses + logging.info("Checkpointing enabled, will verify every %s calls" + " and try to checkpoint every %s seconds", + self.maxcall, self.checkpoint_time_interval) + else: + self.run_with_checkpoint = False + if self.nlive < 0: # Interpret a negative input value for the number of live points # (which is clearly an invalid input in all senses) # as the desire to dynamically determine that number self._sampler = dynesty.DynamicNestedSampler(log_likelihood_call, prior_call, self.ndim, - pool=pool, **kwargs) + pool=self.pool, + **kwargs) + self.run_with_checkpoint = False + logging.info("Checkpointing not currently supported with" + "DYNAMIC nested sampler") else: self._sampler = dynesty.NestedSampler(log_likelihood_call, prior_call, self.ndim, nlive=self.nlive, - pool=pool, **kwargs) + pool=self.pool, **kwargs) + + # properties of the internal sampler which should not be pickled + self.no_pickle = ['loglikelihood', + 'prior_transform', + 'propose_point', + 'update_proposal', + '_UPDATE', '_PROPOSE', + 'evolve_point'] def run(self): - self._sampler.run_nested(**self.run_kwds) + diff_niter = 1 + if self.run_with_checkpoint is True: + n_checkpointing = 1 + t0 = time.time() + it = self._sampler.it + + logging.info('Starting from iteration: %s', it) + while diff_niter != 0: + self._sampler.run_nested(maxcall=self.maxcall, **self.run_kwds) + + delta_t = time.time() - t0 + diff_niter = self._sampler.it - it + logging.info("Checking if we should checkpoint: %.2f s", delta_t) + + if delta_t >= self.checkpoint_time_interval: + logging.info('Checkpointing N={}'.format(n_checkpointing)) + self.checkpoint() + n_checkpointing += 1 + t0 = time.time() + it = self._sampler.it + else: + self._sampler.run_nested(**self.run_kwds) @property def io(self): @@ -127,7 +178,6 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, # optional run_nested arguments for dynesty rargs = {'maxiter': int, - 'maxcall': int, 'dlogz': float, 'logl_max': float, 'n_effective': int, @@ -135,10 +185,13 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, # optional arguments for dynesty cargs = {'bound': str, + 'maxcall': int, 'bootstrap': int, 'enlarge': float, 'update_interval': float, - 'sample': str} + 'sample': str, + 'checkpoint_time_interval': float + } extra = {} run_extra = {} for karg in cargs: @@ -152,16 +205,84 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, obj = cls(model, nlive=nlive, nprocesses=nprocesses, loglikelihood_function=loglikelihood_function, use_mpi=use_mpi, run_kwds=run_extra, **extra) - setup_output(obj, output_file) + setup_output(obj, output_file, check_nsamples=False) + if not obj.new_checkpoint: obj.resume_from_checkpoint() return obj def checkpoint(self): - pass + """Checkpoint function for dynesty sampler + """ + with loadfile(self.checkpoint_file, 'a') as fp: + fp.write_random_state() + + # Dynesty has its own __getstate__ which deletes + # random state information and the pool + saved = {} + for key in self.no_pickle: + if hasattr(self._sampler, key): + saved[key] = getattr(self._sampler, key) + setattr(self._sampler, key, None) + + # For the dynamic sampler, we must also handle the internal + # sampler object + #saved_internal = {} + #if self.nlive < 0: + # for key in self.no_pickle: + # if hasattr(self._sampler.sampler, key): + # saved[key] = getattr(self._sampler.sampler, key) + # setattr(self._sampler.sampler, key, None) + + #for key in self._sampler.__dict__: + # print(key, type(self._sampler.__dict__[key])) + + #for key in self._sampler.sampler.__dict__: + # print(key, type(self._sampler.sampler.__dict__[key])) + + fp.write_pickled_data_into_checkpoint_file(self._sampler) + + # Restore properties that couldn't be pickled if we are continuing + for key in saved: + setattr(self._sampler, key, saved[key]) + + # Restore for dynamic nested sampler's internal sampler + #for key in saved_internal: + # setattr(self._sampler.sampler, key, saved_internal[key]) def resume_from_checkpoint(self): - pass + try: + with loadfile(self.checkpoint_file, 'r') as fp: + sampler = fp.read_pickled_data_from_checkpoint_file() + + for key in sampler.__dict__: + if key not in self.no_pickle: + value = getattr(sampler, key) + setattr(self._sampler, key, value) + + # If dynamic sampling, also restore internal sampler + #if self.nlive < 0: + # for key in sampler.__dict__: + # if key not in self.no_pickle: + # value = getattr(sampler.sampler, key) + # setattr(self._sampler.sampler, key, value) + + self.set_state_from_file(self.checkpoint_file) + logging.info("Found valid checkpoint file: %s", + self.checkpoint_file) + except Exception as e: + print(e) + logging.info("Failed to load checkpoint file") + + def set_state_from_file(self, filename): + """Sets the state of the sampler back to the instance saved in a file. + """ + with self.io(filename, 'r') as fp: + numpy.random.set_state(fp.read_random_state()) + + self._sampler.rstate = numpy.random + #if self.nlive < 0: + # self._sampler.sampler.rstate = numpy.random def finalize(self): logz = self._sampler.results.logz[-1:][0] @@ -175,20 +296,33 @@ def finalize(self): self.write_results(fn) logging.info("Validating checkpoint and backup files") checkpoint_valid = validate_checkpoint_files( - self.checkpoint_file, self.backup_file) + self.checkpoint_file, self.backup_file, check_nsamples=False) if not checkpoint_valid: raise IOError("error writing to checkpoint file") - @property - def model_stats(self): - logl = self._sampler.results.logl - return {'loglikelihood': logl} - @property def samples(self): - samples_dict = {p: self.posterior_samples[:, i] for p, i in - zip(self.model.variable_params, range(self.ndim))} - return samples_dict + results = self._sampler.results + samples = results.samples + weights = numpy.exp(results.logwt - results.logz[-1]) + N = len(weights) + positions = (numpy.random.random() + numpy.arange(N)) / N + + idx = numpy.zeros(N, dtype=numpy.int) + cumulative_sum = numpy.cumsum(weights) + i, j = 0, 0 + while i < N: + if positions[i] < cumulative_sum[j]: + idx[i] = j + i += 1 + else: + j += 1 + + numpy.random.shuffle(idx) + post = {'loglikelihood': self._sampler.results.logl[idx]} + for i, param in enumerate(self.variable_params): + post[param] = samples[:, i][idx] + return post def set_initial_conditions(self, initial_distribution=None, samples_file=None): @@ -210,27 +344,15 @@ def write_results(self, filename): """ with self.io(filename, 'a') as fp: # write samples - fp.write_samples(self.samples, self.model.variable_params) - # write stats - fp.write_samples(self.model_stats) + fp.write_samples(self.samples) + # write log evidence fp.write_logevidence(self._sampler.results.logz[-1:][0], self._sampler.results.logzerr[-1:][0]) @property - def posterior_samples(self): - """ - Returns posterior samples from nested samples and weights - given by dynsety sampler - """ - - dynesty_samples = self._sampler.results['samples'] - wt = numpy.exp(self._sampler.results['logwt'] - - self._sampler.results['logz'][-1]) - # Make sure that sum of weights equal to 1 - weights = wt/numpy.sum(wt) - posterior_dynesty = resample_equal(dynesty_samples, weights) - return posterior_dynesty + def model_stats(self): + pass @property def logz(self): @@ -238,7 +360,6 @@ def logz(self): return bayesian evidence estimated by dynesty sampler """ - return self._sampler.results.logz[-1:][0] @property @@ -247,5 +368,4 @@ def logz_err(self): return error in bayesian evidence estimated by dynesty sampler """ - return self._sampler.results.logzerr[-1:][0] diff --git a/pycbc/inference/sampler/multinest.py b/pycbc/inference/sampler/multinest.py index 6acd9bb4559..0654745c7a6 100644 --- a/pycbc/inference/sampler/multinest.py +++ b/pycbc/inference/sampler/multinest.py @@ -327,7 +327,7 @@ def checkpoint(self): f_p.write_niterations(self.niterations) logging.info("Validating checkpoint and backup files") checkpoint_valid = validate_checkpoint_files( - self.checkpoint_file, self.backup_file) + self.checkpoint_file, self.backup_file, check_nsamples=False) if not checkpoint_valid: raise IOError("error writing to checkpoint file") diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index 370096eaf5d..83ef16d9965 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -1206,7 +1206,8 @@ def get_all_subkeys(grp, key): # -def dump_state(state, fp, path=None, dsetname='state', protocol=None): +def dump_state(state, fp, path=None, dsetname='state', + protocol=pickle.HIGHEST_PROTOCOL): """Dumps the given state to an hdf5 file handler. The state is stored as a raw binary array to ``{path}/{dsetname}`` in the diff --git a/pycbc/pool.py b/pycbc/pool.py index dce6b87e9f8..5f4d76478bc 100644 --- a/pycbc/pool.py +++ b/pycbc/pool.py @@ -136,6 +136,8 @@ def choose_pool(processes, mpi=False): pool = SinglePool() else: pool = BroadcastPool(processes) + + pool.size = processes return pool From a5950f07c9e8f27f05b26e8af2c0c1534eb684f3 Mon Sep 17 00:00:00 2001 From: Bhooshan Uday Varsha Gadre Date: Wed, 8 Jul 2020 15:49:33 +0200 Subject: [PATCH 47/68] Write n-loudest in the background (#3351) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * To store only n-loudest while writing live background. This can reduc… e storage requirement * Actually get N after partitioning * Fixing parsing arg name Co-authored-by: Bhooshan Gadre --- bin/pycbc_live | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/pycbc_live b/bin/pycbc_live index 4e9b70c63a7..e54449b4c95 100755 --- a/bin/pycbc_live +++ b/bin/pycbc_live @@ -562,6 +562,9 @@ parser.add_argument('--store-psd', action='store_true') parser.add_argument('--output-background', type=str, nargs='+', help='Takes a period in seconds and a file path and dumps ' 'the coinc backgrounds to that path with that period') +parser.add_argument('--output-background-n-loudest', type=int, default=0, + help="If given an integer (assumed positive), it stores loudest n triggers" + "(not sorted) for each of the coinc background") parser.add_argument('--newsnr-threshold', type=float, default=0) parser.add_argument('--max-batch-size', type=int, default=2**27) @@ -872,8 +875,14 @@ with ctx: bg_fn = os.path.join(args.output_background[1], bg_fn) with h5py.File(bg_fn, 'w') as bgf: for bg_ifos, bg_data, bg_time in bg_dists: - ds = bgf.create_dataset(','.join(sorted(bg_ifos)), - data=bg_data, compression='gzip') + if args.output_background_n_loudest: + n_loudest = args.output_background_n_loudest + assert (n_loudest > 0), "We can only store positive int loudest triggers." + ds = bgf.create_dataset(','.join(sorted(bg_ifos)), + data=-numpy.partition(-bg_data, n_loudest)[:n_loudest], compression='gzip') + else: + ds = bgf.create_dataset(','.join(sorted(bg_ifos)), + data=bg_data, compression='gzip') ds.attrs['background_time'] = bg_time bgf.attrs['gps_time'] = last_bg_dump_time From 6fbf37ff193580ab0fc5fbfc76ff8a3ca7bb013e Mon Sep 17 00:00:00 2001 From: Connor McIsaac <33734557+connor-mcisaac@users.noreply.github.com> Date: Thu, 9 Jul 2020 09:13:34 +0100 Subject: [PATCH 48/68] Fix pruning for single templates (#3357) * fix pruning for single template * fix typo * test for size zero bin * Revert "fix typo" This reverts commit 12d177c18ee68efc0e14b4a72cc1a8801aa57d09. * Revert "fix pruning for single template" This reverts commit 3eb2274e88c908934d7e0db215943910e297ecd5. --- pycbc/events/trigger_fits.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pycbc/events/trigger_fits.py b/pycbc/events/trigger_fits.py index e867a2e72c1..a20819119f5 100644 --- a/pycbc/events/trigger_fits.py +++ b/pycbc/events/trigger_fits.py @@ -241,7 +241,12 @@ def which_bin(par, minpar, maxpar, nbins, log=False): if log: par, minpar, maxpar = numpy.log(par), numpy.log(minpar), numpy.log(maxpar) # par lies some fraction of the way between min and max - frac = float(par - minpar) / float(maxpar - minpar) + if minpar != maxpar: + frac = float(par - minpar) / float(maxpar - minpar) + else: + # if they are equal there is only one size 0 bin + # must be in that bin + frac = 0 # binind then lies between 0 and nbins - 1 binind = int(frac * nbins) # corner case From 0dd697e012917d6ed13755601b81671775420efd Mon Sep 17 00:00:00 2001 From: Gareth S Davies Date: Thu, 9 Jul 2020 12:04:12 +0200 Subject: [PATCH 49/68] Offline hash handling (#3314) * Change hashing function in offline bank creation to be safe when used with python3 * some other python3 compatibility fixes --- bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj | 4 +-- bin/plotting/pycbc_ifar_catalog | 4 ++- .../pycbc_create_offline_search_workflow | 2 +- pycbc/events/coinc.py | 2 +- pycbc/waveform/bank.py | 27 ++++++++++++++++++- 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj b/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj index 2862a4b5750..ea2c0f9f119 100644 --- a/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj +++ b/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj @@ -101,8 +101,8 @@ if len(zdata) > 0: f['foreground/fap_exc'] = fap_exc logging.info('calculating injection backgrounds') - ifotimes = zip(zdata.data['%s/time' % ifo] for ifo in args.ifos) - ftimes = numpy.concatenate(ifotimes).mean(axis=0) + ifotimes = numpy.array([zdata.data['%s/time' % ifo] for ifo in args.ifos]) + ftimes = ifotimes.mean(axis=0) start, end = ftimes - args.veto_window, ftimes + args.veto_window fnlouder = numpy.zeros(len(ftimes), dtype=numpy.float32) diff --git a/bin/plotting/pycbc_ifar_catalog b/bin/plotting/pycbc_ifar_catalog index a5138fa88dd..5f2901b8c9f 100644 --- a/bin/plotting/pycbc_ifar_catalog +++ b/bin/plotting/pycbc_ifar_catalog @@ -131,7 +131,9 @@ if h_inc_back_num >= 0 and h_iterations is not None and h_iterations != 0: h_rm_ifar = hrm_sorted[idx_start:] h_rm_cumnum = numpy.arange(len(h_rm_ifar), 0, -1) else: - fore_ifar = f['foreground/' + ifar_str][:] + fore_ifar = numpy.array([]) + for f in trigf: + fore_ifar = numpy.append(fore_ifar, f['foreground/' + ifar_str][:]) if opts.remove_threshold is not None and opts.truncate_threshold is not None: raise RuntimeError("Can't both remove and truncate foreground events!") diff --git a/bin/workflows/pycbc_create_offline_search_workflow b/bin/workflows/pycbc_create_offline_search_workflow index 8abe64764eb..72fff327043 100644 --- a/bin/workflows/pycbc_create_offline_search_workflow +++ b/bin/workflows/pycbc_create_offline_search_workflow @@ -224,7 +224,7 @@ else: # Create coinc tag coinctag = '{}det'.format(len(ifocomb)) ctagcomb = ['full_data', coinctag] - other_ifo_keys = no_fg_exc_files.keys() + other_ifo_keys = list(no_fg_exc_files.keys()) other_ifo_keys.remove(ordered_ifo_list) other_bg_files = {ctype: no_fg_exc_files[ctype] for ctype in other_ifo_keys} diff --git a/pycbc/events/coinc.py b/pycbc/events/coinc.py index 122cf7122c9..ac9d31ccea4 100644 --- a/pycbc/events/coinc.py +++ b/pycbc/events/coinc.py @@ -428,7 +428,7 @@ def cluster_coincs_multiifo(stat, time_coincs, timeslide_id, slide, window, argm cindex: numpy.ndarray The set of indices corresponding to the surviving coincidences """ - time_coinc_zip = zip(*time_coincs) + time_coinc_zip = list(zip(*time_coincs)) if len(time_coinc_zip) == 0: logging.info('No coincident triggers.') return numpy.array([]) diff --git a/pycbc/waveform/bank.py b/pycbc/waveform/bank.py index 56e1a0eec6b..6bf7f30a808 100644 --- a/pycbc/waveform/bank.py +++ b/pycbc/waveform/bank.py @@ -38,6 +38,8 @@ from pycbc import DYN_RANGE_FAC from pycbc.types import FrequencySeries, zeros import pycbc.io +import six +import hashlib def sigma_cached(self, psd): """ Cache sigma calculate for use in tandem with the FilterBank class @@ -178,6 +180,26 @@ def parse_approximant_arg(approximant_arg, warray): """ return warray.parse_boolargs(boolargs_from_apprxstr(approximant_arg))[0] +def tuple_to_hash(tuple_to_be_hashed): + """ + Return a hash for a numpy array, avoids native (unsafe) python3 hash function + + Parameters + ---------- + tuple_to_be_hashed: tuple + The tuple which is being hashed + Must be convertible to a numpy array + + Returns + ------- + int + an integer representation of the hashed array + """ + if six.PY2: + return hash(tuple_to_be_hashed) + h = hashlib.blake2b(np.array(tuple_to_be_hashed).tobytes('C'), + digest_size=8) + return np.fromstring(h.digest(), dtype=int)[0] class TemplateBank(object): """Class to provide some basic helper functions and information @@ -335,8 +357,11 @@ def ensure_hash(self): 'spin2x', 'spin2y', 'spin2z',] fields = [f for f in hash_fields if f in fields] - template_hash = np.array([hash(v) for v in zip(*[self.table[p] + template_hash = np.array([tuple_to_hash(v) for v in zip(*[self.table[p] for p in fields])]) + if not np.unique(template_hash).size == template_hash.size: + raise RuntimeError("Some template hashes clash. This should not " + "happen.") self.table = self.table.add_fields(template_hash, 'template_hash') def write_to_hdf(self, filename, start_index=None, stop_index=None, From fc8cbf49889e527ff8bd3a1cc6e6900d62e190b8 Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Thu, 9 Jul 2020 18:51:06 +0100 Subject: [PATCH 50/68] Fix some issues with #3344 (#3359) * Pass kwargs straight through * Fix to mfup issue * Need two time-dependent options --- pycbc/workflow/core.py | 4 ++-- pycbc/workflow/minifollowups.py | 6 +++--- pycbc/workflow/plotting.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pycbc/workflow/core.py b/pycbc/workflow/core.py index 4e73725bdee..55200a44762 100644 --- a/pycbc/workflow/core.py +++ b/pycbc/workflow/core.py @@ -455,12 +455,12 @@ def has_opt(self, opt): return False - def create_node(self): + def create_node(self, **kwargs): """Default node constructor. This is usually overridden by subclasses of Executable. """ - return Node(self) + return Node(self, **kwargs) def update_current_retention_level(self, value): """Set a new value for the current retention level. diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py index 9e938a65932..a915b13ea7d 100644 --- a/pycbc/workflow/minifollowups.py +++ b/pycbc/workflow/minifollowups.py @@ -345,7 +345,7 @@ class SingleTemplateExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] - time_dependent_options = ['--channel-name'] + time_dependent_options = ['--channel-name', '--frame-type'] class SingleTimeFreqExecutable(PlotExecutable): @@ -354,7 +354,7 @@ class SingleTimeFreqExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] - time_dependent_options = ['--channel-name'] + time_dependent_options = ['--channel-name', '--frame-type'] class PlotQScanExecutable(PlotExecutable): @@ -363,7 +363,7 @@ class PlotQScanExecutable(PlotExecutable): PlotExecutable but adds the file_input_options. """ file_input_options = ['--gating-file'] - time_dependent_options = ['--channel-name'] + time_dependent_options = ['--channel-name', '--frame-type'] def make_single_template_plots(workflow, segs, data_read_name, analyzed_name, diff --git a/pycbc/workflow/plotting.py b/pycbc/workflow/plotting.py index f4e73bef0f7..453885da4a8 100644 --- a/pycbc/workflow/plotting.py +++ b/pycbc/workflow/plotting.py @@ -50,8 +50,8 @@ class PlotExecutable(Executable): # plots and final results should get the highest priority # on the job queue - def create_node(self): - node = Executable.create_node(self) + def create_node(self, **kwargs): + node = Executable.create_node(self, **kwargs) node.set_priority(1000) return node From 282bb0a7d4d0edf100f2655cdca0fc14a00a1b20 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Fri, 10 Jul 2020 09:56:56 +0200 Subject: [PATCH 51/68] test (#3366) --- tools/pycbc_test_suite.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/pycbc_test_suite.sh b/tools/pycbc_test_suite.sh index dfe332e259f..42034978f75 100755 --- a/tools/pycbc_test_suite.sh +++ b/tools/pycbc_test_suite.sh @@ -159,12 +159,10 @@ popd echo -e "\\n>> [`date`] Building documentation" -python setup.py build_gh_pages &> $LOG_FILE +python setup.py build_gh_pages & if test $? -ne 0 ; then echo -e " FAILED!" echo -e "---------------------------------------------------------" - cat $LOG_FILE - echo -e "---------------------------------------------------------" RESULT=1 fi From e5da828306e3af038f5fee4e9a74b12a9f7cd128 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Fri, 10 Jul 2020 11:01:17 +0200 Subject: [PATCH 52/68] use radians consistently for setting angular bounds (#3355) * We should be consistent about units * update docs * fix cyclic bound for uniform angle * update unittest * ws --- pycbc/distributions/angular.py | 28 ++++++++++++---------------- test/test_distributions.py | 6 +++--- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/pycbc/distributions/angular.py b/pycbc/distributions/angular.py index 0ca04fe8c0b..b321368dec4 100644 --- a/pycbc/distributions/angular.py +++ b/pycbc/distributions/angular.py @@ -32,8 +32,7 @@ class UniformAngle(uniform.Uniform): `cyclic_domain` parameter. Bounds may be provided to limit the range for which the pdf has support. - If provided, the parameter bounds are initialized as multiples of pi, - while the stored bounds are in radians. + If provided, the parameter bounds are in radians. Parameters ---------- @@ -45,7 +44,7 @@ class UniformAngle(uniform.Uniform): The keyword arguments should provide the names of parameters and (optionally) their corresponding bounds, as either `boundaries.Bounds` instances or tuples. The bounds must be - in [0,2). These are converted to radians for storage. None may also + in [0,2PI). These are converted to radians for storage. None may also be passed; in that case, the domain bounds will be used. Attributes @@ -79,18 +78,17 @@ def __init__(self, cyclic_domain=False, **params): bnds = self._domain elif isinstance(bnds, boundaries.Bounds): # convert to radians - bnds._min = bnds._min.__class__(bnds._min * numpy.pi) - bnds._max = bnds._max.__class__(bnds._max * numpy.pi) + bnds._min = bnds._min.__class__(bnds._min) + bnds._max = bnds._max.__class__(bnds._max) else: # create a Bounds instance from the given tuple - bnds = boundaries.Bounds( - bnds[0]*numpy.pi, bnds[1]*numpy.pi) + bnds = boundaries.Bounds(bnds[0], bnds[1]) # check that the bounds are in the domain if bnds.min < self._domain.min or bnds.max > self._domain.max: raise ValueError("bounds must be in [{x},{y}); " - "got [{a},{b})".format(x=self._domain.min/numpy.pi, - y=self._domain.max/numpy.pi, a=bnds.min/numpy.pi, - b=bnds.max/numpy.pi)) + "got [{a},{b})".format(x=self._domain.min, + y=self._domain.max, a=bnds.min, + b=bnds.max)) # update params[p] = bnds super(UniformAngle, self).__init__(**params) @@ -118,7 +116,7 @@ def apply_boundary_conditions(self, **kwargs): """ # map values to be within the domain kwargs = dict([[p, self._domain.apply_conditions(val)] - for p,val in kwargs.items()]) + for p,val in kwargs.items() if p in self._bounds]) # now apply additional conditions return super(UniformAngle, self).apply_boundary_conditions(**kwargs) @@ -187,8 +185,7 @@ class SinAngle(UniformAngle): The domain of this distribution is `[0, pi]`. This is accomplished by putting hard boundaries at `[0, pi]`. Bounds may be provided to further limit the range for which the pdf has support. As with `UniformAngle`, - these are initizliaed as multiples of pi, while the stored bounds are in - radians. + these are initialized in radians. Parameters ---------- @@ -196,7 +193,7 @@ class SinAngle(UniformAngle): The keyword arguments should provide the names of parameters and (optionally) their corresponding bounds, as either `boundaries.Bounds` instances or tuples. The bounds must be - in [0,1]. These are converted to radians for storage. None may also + in [0,PI]. These are converted to radians for storage. None may also be passed; in that case, the domain bounds will be used. Attributes @@ -300,8 +297,7 @@ class CosAngle(SinAngle): The keyword arguments should provide the names of parameters and (optionally) their corresponding bounds, as either `boundaries.Bounds` instances or tuples. The bounds must be - in [-0.5, 0.5]. These are converted to radians for storage. - None may also be passed; in that case, the domain bounds will be used. + in [-PI/2, PI/2]. Attributes ---------------- diff --git a/test/test_distributions.py b/test/test_distributions.py index bd93efdc27f..8e290e2ded2 100644 --- a/test/test_distributions.py +++ b/test/test_distributions.py @@ -176,9 +176,9 @@ def test_solid_angle(self): n_samples = int(1e6) # create generic angular distributions for test - sin_dist = distributions.SinAngle(theta=(0, 1)) - cos_dist = distributions.CosAngle(theta=(-0.5, 0.5)) - ang_dist = distributions.UniformAngle(theta=(0, 2)) + sin_dist = distributions.SinAngle(theta=(0, numpy.pi)) + cos_dist = distributions.CosAngle(theta=(-numpy.pi/2.0, numpy.pi/2.0)) + ang_dist = distributions.UniformAngle(theta=(0, numpy.pi*2.0)) # step size for PDF calculation step = 0.1 From e8694764a3d81f5be287061f505f9887cf2b4181 Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Fri, 10 Jul 2020 11:35:58 +0100 Subject: [PATCH 53/68] Add nonfsio site (#3360) --- .../pegasus_files/nonfsio-site-template.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 pycbc/workflow/pegasus_files/nonfsio-site-template.xml diff --git a/pycbc/workflow/pegasus_files/nonfsio-site-template.xml b/pycbc/workflow/pegasus_files/nonfsio-site-template.xml new file mode 100644 index 00000000000..a57035a512b --- /dev/null +++ b/pycbc/workflow/pegasus_files/nonfsio-site-template.xml @@ -0,0 +1,14 @@ + + + + + + + + + + condor + $ACCOUNTING_GROUP + YES + ON_EXIT_OR_EVICT From 5685400ea258260a78b8ddf6ff49e0c60aab0d22 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Fri, 10 Jul 2020 13:05:36 +0200 Subject: [PATCH 54/68] pass boundary types to dynesty (#3364) * hook up boundary types to dynesty * nc * ws * ws --- pycbc/boundaries.py | 4 ++++ pycbc/distributions/joint.py | 32 ++++++++++++++++++++++++++++++ pycbc/inference/sampler/dynesty.py | 27 +++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/pycbc/boundaries.py b/pycbc/boundaries.py index cef305dfa42..4199308c864 100644 --- a/pycbc/boundaries.py +++ b/pycbc/boundaries.py @@ -311,12 +311,16 @@ def __init__(self, min_bound=-numpy.inf, max_bound=numpy.inf, # can be used with arrays if self._min.name == 'reflected' and self._max.name == 'reflected': self._reflect = numpy.vectorize(self._reflect_well) + self.reflected = 'well' elif self._min.name == 'reflected': self._reflect = numpy.vectorize(self._min.reflect_right) + self.reflected = 'min' elif self._max.name == 'reflected': self._reflect = numpy.vectorize(self._max.reflect_left) + self.reflected = 'max' else: self._reflect = _pass + self.reflected = False def __repr__(self): return str(self.__class__)[:-1] + " " + " ".join( diff --git a/pycbc/distributions/joint.py b/pycbc/distributions/joint.py index d99e4ef4f85..f0c7b7eb2b8 100644 --- a/pycbc/distributions/joint.py +++ b/pycbc/distributions/joint.py @@ -195,6 +195,38 @@ def rvs(self, size=1): return out + @property + def well_reflected(self): + """ Get list of which parameters are well reflected + """ + reflect = [] + bounds = self.bounds + for param in bounds: + if bounds[param].reflected == 'well': + reflect.append(param) + return reflect + + @property + def cyclic(self): + """ Get list of which parameters are cyclic + """ + cyclic = [] + bounds = self.bounds + for param in bounds: + if bounds[param].cyclic: + cyclic.append(param) + return cyclic + + @property + def bounds(self): + """ Get the dict of boundaries + """ + bnds = {} + for dist in self.distributions: + if hasattr(dist, 'bounds'): + bnds.update(dist.bounds) + return bnds + def cdfinv(self, **original): """ Apply the inverse cdf to the array of values [0, 1]. Every variable parameter must be given as a keyword argument. diff --git a/pycbc/inference/sampler/dynesty.py b/pycbc/inference/sampler/dynesty.py index a7fd4972216..9857ff239b4 100644 --- a/pycbc/inference/sampler/dynesty.py +++ b/pycbc/inference/sampler/dynesty.py @@ -104,6 +104,29 @@ def __init__(self, model, nlive, nprocesses=1, else: self.run_with_checkpoint = False + # Check for cyclic boundaries + periodic = [] + cyclic = self.model.prior_distribution.cyclic + for i, param in enumerate(self.variable_params): + if param in cyclic: + logging.info('Param: %s will be cyclic', param) + periodic.append(i) + + if len(periodic) == 0: + periodic = None + + # Check for reflected boundaries. Dynesty only supports + # reflection on both min and max of boundary. + reflective = [] + reflect = self.model.prior_distribution.well_reflected + for i, param in enumerate(self.variable_params): + if param in reflect: + logging.info("Param: %s will be well reflected", param) + reflective.append(i) + + if len(reflective) == 0: + reflective = None + if self.nlive < 0: # Interpret a negative input value for the number of live points # (which is clearly an invalid input in all senses) @@ -111,6 +134,8 @@ def __init__(self, model, nlive, nprocesses=1, self._sampler = dynesty.DynamicNestedSampler(log_likelihood_call, prior_call, self.ndim, pool=self.pool, + reflective=reflective, + periodic=periodic, **kwargs) self.run_with_checkpoint = False logging.info("Checkpointing not currently supported with" @@ -119,6 +144,8 @@ def __init__(self, model, nlive, nprocesses=1, self._sampler = dynesty.NestedSampler(log_likelihood_call, prior_call, self.ndim, nlive=self.nlive, + reflective=reflective, + periodic=periodic, pool=self.pool, **kwargs) # properties of the internal sampler which should not be pickled From 121ebbb11249f37f745d9b2fb954ac71800f2f2a Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Fri, 10 Jul 2020 14:46:28 +0200 Subject: [PATCH 55/68] Write checkpoint history (#3347) * add ability to append data in base_hdf write_data * add write start time method to base sampler io * add update_checkpoint_history function to base sampler file * add update sampler history to base mcmc file * write run start time when starting up run * call update checkpoint history in base mcmc's checkpoint * fix various bugs * add run end time attribute * handle no act or burn in having been calculated --- bin/inference/pycbc_inference | 4 ++ pycbc/inference/io/base_hdf.py | 41 ++++++++++++-- pycbc/inference/io/base_mcmc.py | 31 +++++++++++ pycbc/inference/io/base_sampler.py | 82 ++++++++++++++++++++++++++++ pycbc/inference/sampler/base.py | 1 + pycbc/inference/sampler/base_mcmc.py | 4 ++ 6 files changed, 157 insertions(+), 6 deletions(-) diff --git a/bin/inference/pycbc_inference b/bin/inference/pycbc_inference index b46b9cd410b..0d653d4a01f 100644 --- a/bin/inference/pycbc_inference +++ b/bin/inference/pycbc_inference @@ -158,6 +158,10 @@ if not opts.save_backup: logging.info("Deleting backup file") os.remove(sampler.backup_file) +# write the end time +with sampler.io(opts.output_file, 'a') as fp: + fp.write_run_end_time() + if condor_ckpt: # create an empty checkpoint file open(sampler.checkpoint_file, 'a').close() diff --git a/pycbc/inference/io/base_hdf.py b/pycbc/inference/io/base_hdf.py index c7b4f38795f..88e7733ce8a 100644 --- a/pycbc/inference/io/base_hdf.py +++ b/pycbc/inference/io/base_hdf.py @@ -760,14 +760,12 @@ def write_kwargs_to_attrs(cls, attrs, **kwargs): else: attrs[arg] = val - def write_data(self, name, data, path=None): + def write_data(self, name, data, path=None, append=False): """Convenience function to write data. Given ``data`` is written as a dataset with ``name`` in ``path``. - If the data hasn't been written yet, the dataset will be created. - Otherwise, will overwrite the data that is there. If data already - exists in the file with the same name and path, the given data must - have the same shape. + If the dataset or path do not exist yet, the dataset and path will + be created. Parameters ---------- @@ -783,6 +781,14 @@ def write_data(self, name, data, path=None): Write to the given path. Default (None) will write to the top level. If the path does not exist in the file, it will be created. + append : bool, optional + Append the data to what is currently in the file if ``path/name`` + already exists in the file, and if it does not, create the dataset + so that its last dimension can be resized. The data can only + be appended along the last dimension, and if it already exists in + the data, it must be resizable along this dimension. If ``False`` + (the default) what is in the file will be overwritten, and the + given data must have the same shape. """ if path is None: path = '/' @@ -795,7 +801,30 @@ def write_data(self, name, data, path=None): if isinstance(data, dict): # call myself for each key, value pair in the dictionary for key, val in data.items(): - self.write_data(key, val, path='/'.join([path, name])) + self.write_data(key, val, path='/'.join([path, name]), + append=append) + # if appending, we need to resize the data on disk, or, if it doesn't + # exist yet, create a dataset that is resizable along the last + # dimension + elif append: + # cast the data to an array if it isn't already one + if isinstance(data, (list, tuple)): + data = numpy.array(data) + if not isinstance(data, numpy.ndarray): + data = numpy.array([data]) + dshape = data.shape + ndata = dshape[-1] + try: + startidx = group[name].shape[-1] + group[name].resize(dshape[-1]+group[name].shape[-1], + axis=len(group[name].shape)-1) + except KeyError: + # dataset doesn't exist yet + group.create_dataset(name, dshape, + maxshape=tuple(list(dshape)[:-1]+[None]), + dtype=data.dtype, fletcher32=True) + startidx = 0 + group[name][..., startidx:startidx+ndata] = data[..., :] else: try: group[name][()] = data diff --git a/pycbc/inference/io/base_mcmc.py b/pycbc/inference/io/base_mcmc.py index d55415eeaf7..2b16cd92d0b 100644 --- a/pycbc/inference/io/base_mcmc.py +++ b/pycbc/inference/io/base_mcmc.py @@ -339,6 +339,37 @@ def raw_acls(self, acls): """ self.raw_acts = {p: acls[p] * self.thinned_by for p in acls} + def _update_sampler_history(self): + """Writes the number of iterations, effective number of samples, + autocorrelation times, and burn-in iteration to the history. + """ + path = '/'.join([self.sampler_group, 'checkpoint_history']) + # write the current number of iterations + self.write_data('niterations', self.niterations, path=path, + append=True) + self.write_data('effective_nsamples', self.effective_nsamples, + path=path, append=True) + # write the act: we'll make sure that this is 2D, so that the acts + # can be appened along the last dimension + try: + act = self.act + except ValueError: + # no acts were calculate + act = None + if act is not None: + act = act.reshape(tuple(list(act.shape)+[1])) + self.write_data('act', act, path=path, append=True) + # write the burn in iteration in the same way + try: + burn_in = self.burn_in_iteration + except ValueError: + # no burn in tests were done + burn_in = None + if burn_in is not None: + burn_in = burn_in.reshape(tuple(list(burn_in.shape)+[1])) + self.write_data('burn_in_iteration', burn_in, path=path, + append=True) + @staticmethod def extra_args_parser(parser=None, skip_args=None, **kwargs): """Create a parser to parse sampler-specific arguments for loading diff --git a/pycbc/inference/io/base_sampler.py b/pycbc/inference/io/base_sampler.py index 42e9d2c3c76..a00171437d8 100644 --- a/pycbc/inference/io/base_sampler.py +++ b/pycbc/inference/io/base_sampler.py @@ -17,6 +17,7 @@ from __future__ import absolute_import +import time from abc import (ABCMeta, abstractmethod) from six import add_metaclass @@ -31,6 +32,41 @@ class BaseSamplerFile(BaseInferenceFile): This adds abstract methods ``write_resume_point`` and ``write_sampler_metadata`` to :py:class:`BaseInferenceFile`. """ + def write_run_start_time(self): + """Writes the current (UNIX) time to the file. + + Times are stored as a list in the file's ``attrs``, with name + ``run_start_time``. If the attrbute already exists, the current time + is appended. Otherwise, the attribute will be created and time added. + """ + attrname = "run_start_time" + try: + times = self.attrs[attrname].tolist() + except KeyError: + times = [] + times.append(time.time()) + self.attrs[attrname] = times + + @property + def run_start_time(self): + """The (UNIX) time pycbc inference began running. + + If the run resumed from a checkpoint, the time the last checkpoint + started is reported. + """ + return self.attrs['run_start_time'][-1] + + def write_run_end_time(self): + """"Writes the curent (UNIX) time as the ``run_end_time`` attribute. + """ + self.attrs["run_end_time"] = time.time() + + @property + def run_end_time(self): + """The (UNIX) time pycbc inference finished. + """ + return self.attrs["run_end_time"] + @abstractmethod def write_resume_point(self): """Should write the point that a sampler starts up. @@ -49,6 +85,52 @@ def write_sampler_metadata(self, sampler): """ pass + def update_checkpoint_history(self): + """Writes a copy of relevant metadata to the file's checkpoint history. + + All data are written to ``sampler_info/checkpoint_history``. If the + group does not exist yet, it will be created. + + This function writes the current time and the time since the last + checkpoint to the file. It will also call + :py:func:`_update_sampler_history` to write sampler-specific history. + """ + path = '/'.join([self.sampler_group, 'checkpoint_history']) + try: + history = self[path] + except KeyError: + # assume history doesn't exist yet + self.create_group(path) + history = self[path] + # write the checkpoint time + current_time = time.time() + self.write_data('checkpoint_time', current_time, path=path, + append=True) + # get the amount of time since the last checkpoint + checkpoint_times = history['checkpoint_time'][()] + if len(checkpoint_times) == 1: + # this is the first checkpoint, get the run time for comparison + lasttime = self.run_start_time + else: + lasttime = checkpoint_times[-2] + # if a resume happened since the last checkpoint, use the resume + # time instad + if lasttime < self.run_start_time: + lasttime = self.run_start_time + self.write_data('checkpoint_dt', current_time-lasttime, path=path, + append=True) + # write any sampler-specific history + self._update_sampler_history() + + def _update_sampler_history(self): + """Writes sampler-specific history to the file. + + This function does nothing. Classes that inherit from it may override + it to add any extra information they would like written. This is + called by :py:func:`update_checkpoint_history`. + """ + pass + def validate(self): """Runs a validation test. diff --git a/pycbc/inference/sampler/base.py b/pycbc/inference/sampler/base.py index 7e34d3f9236..7341e240471 100644 --- a/pycbc/inference/sampler/base.py +++ b/pycbc/inference/sampler/base.py @@ -193,6 +193,7 @@ def setup_output(sampler, output_file, check_nsamples=True): with sampler.io(fn, "a") as fp: fp.write_command_line() fp.write_resume_point() + fp.write_run_start_time() # store sampler.checkpoint_file = checkpoint_file sampler.backup_file = backup_file diff --git a/pycbc/inference/sampler/base_mcmc.py b/pycbc/inference/sampler/base_mcmc.py index c931beffcb7..95194f3524a 100644 --- a/pycbc/inference/sampler/base_mcmc.py +++ b/pycbc/inference/sampler/base_mcmc.py @@ -607,6 +607,10 @@ def checkpoint(self): fp.acl = self.acl # write effective number of samples fp.write_effective_nsamples(self.effective_nsamples) + # write history + for fn in [self.checkpoint_file, self.backup_file]: + with self.io(fn, "a") as fp: + fp.update_checkpoint_history() # check validity logging.info("Validating checkpoint and backup files") checkpoint_valid = validate_checkpoint_files( From da1583312796ac11657a45931c35f51b2d126a93 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Fri, 10 Jul 2020 15:16:04 +0200 Subject: [PATCH 56/68] set correct dtype when reading epsie samples (#3367) --- pycbc/inference/io/base_multitemper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pycbc/inference/io/base_multitemper.py b/pycbc/inference/io/base_multitemper.py index 938feaf475b..549ec64d307 100644 --- a/pycbc/inference/io/base_multitemper.py +++ b/pycbc/inference/io/base_multitemper.py @@ -280,7 +280,8 @@ def read_raw_samples(fp, fields, alist.append(thisarr) maxiters = max(maxiters, thisarr.shape[-1]) # stack into a single array - arr = numpy.full((ntemps, len(chains), maxiters), numpy.nan) + arr = numpy.full((ntemps, len(chains), maxiters), numpy.nan, + dtype=fp[dset].dtype) for ii, thisarr in enumerate(alist): if thisarr is not None: arr[:, ii, :thisarr.shape[-1]] = thisarr From 28c84679650cb007b8e76a963545a84fdd5a3b33 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Fri, 10 Jul 2020 18:13:33 +0200 Subject: [PATCH 57/68] update support for ultranest (#3369) * cyclic support for ultranest, output logl * ws --- pycbc/inference/sampler/ultranest.py | 48 ++++++++++++++++++++++++---- pycbc/io/hdf.py | 2 +- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/pycbc/inference/sampler/ultranest.py b/pycbc/inference/sampler/ultranest.py index c3f09bb22f4..d0c5afe446c 100644 --- a/pycbc/inference/sampler/ultranest.py +++ b/pycbc/inference/sampler/ultranest.py @@ -22,7 +22,7 @@ # ============================================================================= # """ -This modules provides classes and functions for using the dynesty sampler +This modules provides classes and functions for using the ultranest sampler packages for parameter estimation. """ @@ -30,8 +30,10 @@ from __future__ import absolute_import import logging +import numpy from pycbc.inference.io.ultranest import UltranestFile +from pycbc.io.hdf import dump_state from .base import (BaseSampler, setup_output) from .base_cube import setup_calls @@ -70,10 +72,22 @@ def __init__(self, model, log_dir=None, import ultranest log_likelihood_call, prior_call = setup_calls(model, copy_prior=True) + # Check for cyclic boundaries + periodic = [] + cyclic = self.model.prior_distribution.cyclic + for param in self.variable_params: + if param in cyclic: + logging.info('Param: %s will be cyclic', param) + periodic.append(True) + else: + periodic.append(False) + self._sampler = ultranest.ReactiveNestedSampler( list(self.model.variable_params), log_likelihood_call, - prior_call, log_dir=log_dir, resume=True) + prior_call, log_dir=log_dir, + wrapped_params=periodic, + resume=True) if stepsampling: import ultranest.stepsampler @@ -150,8 +164,23 @@ def model_stats(self): @property def samples(self): - samples_dict = {p: self.result['samples'][:, i] for p, i in - zip(self.model.variable_params, range(self.ndim))} + from ultranest.utils import resample_equal + + # we'll do the resampling ourselves so we can pick up + # additional parameters + try: # Remove me on next ultranest release + wsamples = self.result['weighted_samples']['v'] + weights = self.result['weighted_samples']['w'] + logl = self.result['weighted_samples']['L'] + except KeyError: + wsamples = self.result['weighted_samples']['points'] + weights = self.result['weighted_samples']['weights'] + logl = self.result['weighted_samples']['logl'] + + wsamples = numpy.column_stack((wsamples, logl)) + params = list(self.model.variable_params) + ['loglikelihood'] + samples = resample_equal(wsamples, weights / weights.sum()) + samples_dict = {p: samples[:, i] for i, p in enumerate(params)} return samples_dict def write_results(self, filename): @@ -166,15 +195,20 @@ def write_results(self, filename): """ with self.io(filename, 'a') as fp: # write samples - fp.write_samples(self.samples, self.model.variable_params) + fp.write_samples(self.samples, self.samples.keys()) # write log evidence fp.write_logevidence(self.logz, self.logz_err) + # write full ultranest formatted results + dump_state(self.result, fp, + path='sampler_info', + dsetname='presult') + @property def logz(self): """ return bayesian evidence estimated by - dynesty sampler + ultranest sampler """ return self.result['logz'] @@ -182,6 +216,6 @@ def logz(self): def logz_err(self): """ return error in bayesian evidence estimated by - dynesty sampler + ultranest sampler """ return self.result['logzerr'] diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index 83ef16d9965..4d1af88b133 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -1257,7 +1257,7 @@ def dump_pickle_to_hdf(memfp, fp, path=None, dsetname='state'): memfp.seek(0) bdata = np.frombuffer(memfp.read(), dtype='S1') if path is not None: - fp = fp[path] + dsetname = path + '/' + dsetname if dsetname not in fp: fp.create_dataset(dsetname, shape=bdata.shape, maxshape=(None,), dtype=bdata.dtype) From 63e5dbd838f447443f53a97411967e1e0ce4101a Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Fri, 10 Jul 2020 18:51:18 +0200 Subject: [PATCH 58/68] docs shouldn't run separately (#3370) --- tools/pycbc_test_suite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pycbc_test_suite.sh b/tools/pycbc_test_suite.sh index 42034978f75..a2ddb0a0ca7 100755 --- a/tools/pycbc_test_suite.sh +++ b/tools/pycbc_test_suite.sh @@ -159,7 +159,7 @@ popd echo -e "\\n>> [`date`] Building documentation" -python setup.py build_gh_pages & +python setup.py build_gh_pages if test $? -ne 0 ; then echo -e " FAILED!" echo -e "---------------------------------------------------------" From 705d47db0cd93362ff43b8fa153c436eeab482c8 Mon Sep 17 00:00:00 2001 From: Bhooshan Uday Varsha Gadre Date: Sun, 12 Jul 2020 15:23:54 +0200 Subject: [PATCH 59/68] Fix for not having enough bkg for live (#3363) Co-authored-by: Bhooshan Gadre --- bin/pycbc_live | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/pycbc_live b/bin/pycbc_live index e54449b4c95..892ff41ef46 100755 --- a/bin/pycbc_live +++ b/bin/pycbc_live @@ -875,7 +875,7 @@ with ctx: bg_fn = os.path.join(args.output_background[1], bg_fn) with h5py.File(bg_fn, 'w') as bgf: for bg_ifos, bg_data, bg_time in bg_dists: - if args.output_background_n_loudest: + if args.output_background_n_loudest and (args.output_background_n_loudest < len(bg_data)-1): n_loudest = args.output_background_n_loudest assert (n_loudest > 0), "We can only store positive int loudest triggers." ds = bgf.create_dataset(','.join(sorted(bg_ifos)), From 4a5fb1f97932f89daf175c7d98d42e36b197def8 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Sun, 12 Jul 2020 23:21:16 +0200 Subject: [PATCH 60/68] Add gate and paint function (#3371) * add gate and paint function * add option to copy * Update gate.py Co-authored-by: Alex Nitz --- pycbc/strain/gate.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pycbc/strain/gate.py b/pycbc/strain/gate.py index 90f9f02309d..eceefba1861 100644 --- a/pycbc/strain/gate.py +++ b/pycbc/strain/gate.py @@ -17,6 +17,8 @@ """ from . import strain +from scipy import linalg + def _gates_from_cli(opts, gate_opt): """Parses the given `gate_opt` into something understandable by @@ -137,3 +139,41 @@ def add_gate_option_group(parser): "prior to FFT-ing the data for PSD " "estimation.") return gate_group + + +def gate_and_paint(data, lindex, rindex, invpsd, copy=True): + """Gates and in-paints data. + + Parameters + ---------- + data : TimeSeries + The data to gate. + lindex : int + The start index of the gate. + rindex : int + The end index of the gate. + invpsd : FrequencySeries + The inverse of the PSD. + copy : bool, optional + Copy the data before applying the gate. Otherwise, the gate will + be applied in-place. Default is True. + + Returns + ------- + TimeSeries : + The gated and in-painted time series. + """ + # Copy the data and zero inside the hole + if copy: + data = data.copy() + data[lindex:rindex] = 0 + + # get the over-whitened gated data + tdfilter = invpsd.astype('complex').to_timeseries() * invpsd.delta_t + owhgated_data = (data.to_frequencyseries() * invpsd).to_timeseries() + + # remove the projection into the null space + proj = linalg.solve_toeplitz(tdfilter[:(rindex - lindex)], + owhgated_data[lindex:rindex]) + data[lindex:rindex] -= proj + return data From f2d3d755edc444d2b9a24c925c5725aecdabfd85 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 13 Jul 2020 00:55:41 +0200 Subject: [PATCH 61/68] Vectorize joint distribution (#3352) * try to speed up rvs * speed up rvs by doing batch draws * fix typos --- pycbc/distributions/joint.py | 132 +++++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 29 deletions(-) diff --git a/pycbc/distributions/joint.py b/pycbc/distributions/joint.py index f0c7b7eb2b8..1727c067559 100644 --- a/pycbc/distributions/joint.py +++ b/pycbc/distributions/joint.py @@ -16,7 +16,7 @@ """ import logging import numpy -from pycbc.io import record +from pycbc.io.record import FieldArray class JointDistribution(object): """ @@ -116,12 +116,10 @@ def __init__(self, variable_args, *distributions, **kwargs): draw = dist.rvs(n_test_samples) for param in dist.params: samples[param] = draw[param] - samples = record.FieldArray.from_kwargs(**samples) + samples = FieldArray.from_kwargs(**samples) # evaluate constraints - result = numpy.ones(samples.shape, dtype=bool) - for constraint in self._constraints: - result &= constraint(samples) + result = self.contains(samples) # set new scaling factor for prior to be # the fraction of acceptances in random sampling of entire space @@ -158,41 +156,117 @@ def apply_boundary_conditions(self, **params): params.update(dist.apply_boundary_conditions(**params)) return params + @staticmethod + def _ensure_fieldarray(params): + """Ensures the given params are a ``FieldArray``. + + Parameters + ---------- + params : dict, FieldArray, numpy.record, or numpy.ndarray + If the given object is a dict, it will be converted to a + FieldArray. + + Returns + ------- + params : FieldArray + The given values as a FieldArray. + return_atomic : bool + Whether or not functions run on the parameters should be returned + as atomic types or not. + """ + if isinstance(params, dict): + return_atomic = not any(isinstance(val, numpy.ndarray) + for val in params.values()) + params = FieldArray.from_kwargs(**params) + elif isinstance(params, numpy.record): + return_atomic = True + params = FieldArray.from_records(tuple(params), + names=params.dtype.names) + elif isinstance(params, numpy.ndarray): + return_atomic = False + params = params.view(type=FieldArray) + elif isinstance(params, FieldArray): + return_atomic = False + else: + raise ValueError("params must be either dict, FieldArray, " + "record, or structured array") + return params, return_atomic + + def contains(self, params): + """Evaluates whether the given parameters satisfy the constraints. + + Parameters + ---------- + params : dict, FieldArray, numpy.record, or numpy.ndarray + The parameter values to evaluate. + + Returns + ------- + (array of) bool : + If params was an array, or if params a dictionary and one or more + of the parameters are arrays, will return an array of booleans. + Otherwise, a boolean. + """ + params, return_atomic = self._ensure_fieldarray(params) + # convert params to a field array if it isn't one + result = numpy.ones(params.shape, dtype=bool) + for constraint in self._constraints: + result &= constraint(params) + if return_atomic: + result = result.item() + return result + def __call__(self, **params): """Evaluate joint distribution for parameters. """ - for constraint in self._constraints: - if not constraint(params): - return -numpy.inf - return sum([d(**params) - for d in self.distributions]) - self._logpdf_scale + # convert to Field array + parray, return_atomic = self._ensure_fieldarray(params) + # check if statisfies constraints + isin = self.contains(parray) + if not isin.any(): + if return_atomic: + out = -numpy.inf + else: + out = numpy.full(parray.shape, -numpy.inf) + return out + # evaulate + # note: this step may fail if arrays of values were provided, as + # not all distributions are vectorized currently + logps = numpy.array([d(**params) for d in self.distributions]) + logp = logps.sum(axis=0) + numpy.log(isin.astype(float)) + if return_atomic: + logp = logp.item() + return logp - self._logpdf_scale def rvs(self, size=1): """ Rejection samples the parameter space. """ - # create output FieldArray - out = record.FieldArray(size, dtype=[(arg, float) - for arg in self.variable_args]) - + dtype = [(arg, float) for arg in self.variable_args] + out = FieldArray(size, dtype=dtype) # loop until enough samples accepted - n = 0 - while n < size: - - # draw samples - samples = {} + remaining = size + ndraw = size + while remaining: + # scratch space for evaluating constraints + scratch = FieldArray(ndraw, dtype=dtype) for dist in self.distributions: - draw = dist.rvs(1) + # drawing samples from the distributions is generally faster + # then evaluating constrants, so we'll always draw the full + # size, even if that gives us more points than we need + draw = dist.rvs(size=ndraw) for param in dist.params: - samples[param] = draw[param][0] - vals = numpy.array([samples[arg] for arg in self.variable_args]) - - # determine if all parameter values are in prior space - # if they are then add to output - if self(**dict(zip(self.variable_args, vals))) > -numpy.inf: - out[n] = vals - n += 1 - + scratch[param] = draw[param] + # apply any constraints + keep = self.contains(scratch) + nkeep = keep.sum() + kmin = size - remaining + kmax = min(nkeep, remaining) + out[kmin:kmin+kmax] = scratch[keep][:kmax] + remaining = max(0, remaining - nkeep) + # to try to speed up next go around, we'll increase the draw + # size by the fraction of values that were kept, but cap at 1e6 + ndraw = int(min(1e6, ndraw * numpy.ceil(ndraw / (nkeep + 1.)))) return out @property From 5edf107b89d0bd5158441882443281c96c8414b2 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Mon, 13 Jul 2020 10:25:14 +0200 Subject: [PATCH 62/68] simplify running pycbc inference with mpi (#3373) * fix typo * simplify mpi launch process * simplify pool calls * fixes * fixes * Update dynesty.py * cc * detected case where not launched --- bin/inference/pycbc_inference | 11 ++++--- pycbc/inference/sampler/base_cube.py | 15 ++++------ pycbc/inference/sampler/dynesty.py | 3 +- pycbc/inference/sampler/emcee.py | 10 ++----- pycbc/inference/sampler/emcee_pt.py | 21 +++++-------- pycbc/inference/sampler/epsie.py | 12 ++++---- pycbc/pool.py | 44 ++++++++++++++++++++++++---- 7 files changed, 66 insertions(+), 50 deletions(-) diff --git a/bin/inference/pycbc_inference b/bin/inference/pycbc_inference index 0d653d4a01f..4545ae0646a 100644 --- a/bin/inference/pycbc_inference +++ b/bin/inference/pycbc_inference @@ -27,7 +27,7 @@ import numpy import pycbc from pycbc import (distributions, transforms, fft, - opt, scheme) + opt, scheme, pool) from pycbc.waveform import generator from pycbc import __version__ @@ -80,9 +80,8 @@ opts = parser.parse_args() # setup log # If we're running in MPI mode, only allow the parent to print -if opts.use_mpi: - from mpi4py import MPI - rank = MPI.COMM_WORLD.Get_rank() +use_mpi, size, rank = pycbc.pool.use_mpi(opts.use_mpi, log=False) +if use_mpi: opts.verbose &= rank == 0 pycbc.init_logging(opts.verbose) @@ -122,7 +121,7 @@ with ctx: cp.get('sampler', 'checkpoint-signal'))) # create an empty output file to keep condor happy open(opts.output_file, 'a').close() - + logging.info("Setting up model") # construct class that will return the natural logarithm of likelihood @@ -141,7 +140,7 @@ with ctx: # Run the sampler sampler.run() - # Finalize the output + # Finalize the output sampler.finalize() if condor_ckpt: diff --git a/pycbc/inference/sampler/base_cube.py b/pycbc/inference/sampler/base_cube.py index 35fc06b6f46..6b397208e12 100644 --- a/pycbc/inference/sampler/base_cube.py +++ b/pycbc/inference/sampler/base_cube.py @@ -38,21 +38,16 @@ def call_global_logprior(cube): return models._global_instance.prior_transform(cube) -def setup_calls(model, nprocesses=1, - loglikelihood_function=None, copy_prior=False): +def setup_calls(model, loglikelihood_function=None, copy_prior=False): """ Configure calls for MPI support """ model_call = CubeModel(model, loglikelihood_function, copy_prior=copy_prior) - if nprocesses > 1: - # these are used to help paralleize over multiple cores / MPI - models._global_instance = model_call - log_likelihood_call = call_global_loglikelihood - prior_call = call_global_logprior - else: - prior_call = model_call.prior_transform - log_likelihood_call = model_call.log_likelihood + # these are used to help paralleize over multiple cores / MPI + models._global_instance = model_call + log_likelihood_call = call_global_loglikelihood + prior_call = call_global_logprior return log_likelihood_call, prior_call diff --git a/pycbc/inference/sampler/dynesty.py b/pycbc/inference/sampler/dynesty.py index 9857ff239b4..44ae064e6e8 100644 --- a/pycbc/inference/sampler/dynesty.py +++ b/pycbc/inference/sampler/dynesty.py @@ -80,7 +80,6 @@ def __init__(self, model, nlive, nprocesses=1, self.model = model log_likelihood_call, prior_call = setup_calls( model, - nprocesses=nprocesses, loglikelihood_function=loglikelihood_function) # Set up the pool self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) @@ -97,7 +96,7 @@ def __init__(self, model, nlive, nprocesses=1, if self.checkpoint_time_interval: self.run_with_checkpoint = True if self.maxcall is None: - self.maxcall = 5000 * nprocesses + self.maxcall = 5000 * self.pool.size logging.info("Checkpointing enabled, will verify every %s calls" " and try to checkpoint every %s seconds", self.maxcall, self.checkpoint_time_interval) diff --git a/pycbc/inference/sampler/emcee.py b/pycbc/inference/sampler/emcee.py index cf8c4875fac..05e06e0a490 100644 --- a/pycbc/inference/sampler/emcee.py +++ b/pycbc/inference/sampler/emcee.py @@ -79,14 +79,10 @@ def __init__(self, model, nwalkers, logpost_function = 'logposterior' model_call = models.CallModel(model, logpost_function) - # Set up the pool - if nprocesses > 1: - # these are used to help paralleize over multiple cores / MPI - models._global_instance = model_call - model_call = models._call_global_model + # these are used to help paralleize over multiple cores / MPI + models._global_instance = model_call + model_call = models._call_global_model pool = choose_pool(mpi=use_mpi, processes=nprocesses) - if pool is not None: - pool.count = nprocesses # set up emcee self.nwalkers = nwalkers diff --git a/pycbc/inference/sampler/emcee_pt.py b/pycbc/inference/sampler/emcee_pt.py index 82d0120118e..5da34c798e8 100644 --- a/pycbc/inference/sampler/emcee_pt.py +++ b/pycbc/inference/sampler/emcee_pt.py @@ -83,24 +83,17 @@ def __init__(self, model, ntemps, nwalkers, betas=None, model_call = models.CallModel(model, loglikelihood_function, return_all_stats=False) - # Set up the pool - if nprocesses > 1: - # these are used to help paralleize over multiple cores / MPI - models._global_instance = model_call - model_call = models._call_global_model - prior_call = models._call_global_model_logprior - else: - prior_call = models.CallModel(model, 'logprior', - return_all_stats=False) - pool = choose_pool(mpi=use_mpi, processes=nprocesses) - if pool is not None: - pool.count = nprocesses - self.pool = pool + # these are used to help paralleize over multiple cores / MPI + models._global_instance = model_call + model_call = models._call_global_model + prior_call = models._call_global_model_logprior + self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) + # construct the sampler: PTSampler needs the likelihood and prior # functions separately ndim = len(model.variable_params) self._sampler = emcee.PTSampler(ntemps, nwalkers, ndim, - model_call, prior_call, pool=pool, + model_call, prior_call, pool=self.pool, betas=betas) self.nwalkers = nwalkers self._ntemps = ntemps diff --git a/pycbc/inference/sampler/epsie.py b/pycbc/inference/sampler/epsie.py index cde12adc428..ea66230d537 100644 --- a/pycbc/inference/sampler/epsie.py +++ b/pycbc/inference/sampler/epsie.py @@ -105,14 +105,14 @@ def __init__(self, model, nchains, ntemps=None, betas=None, self.model = model # create a wrapper for calling the model model_call = _EpsieCallModel(model, loglikelihood_function) + + # these are used to help paralleize over multiple cores / MPI + models._global_instance = model_call + model_call = models._call_global_model + # Set up the pool - if nprocesses > 1: - # these are used to help paralleize over multiple cores / MPI - models._global_instance = model_call - model_call = models._call_global_model pool = choose_pool(mpi=use_mpi, processes=nprocesses) - if pool is not None: - pool.count = nprocesses + # initialize the sampler self._sampler = ParallelTemperedSampler( model.sampling_params, model_call, nchains, betas=betas, diff --git a/pycbc/pool.py b/pycbc/pool.py index 5f4d76478bc..efbf55ec939 100644 --- a/pycbc/pool.py +++ b/pycbc/pool.py @@ -7,6 +7,7 @@ import types import signal import atexit +import logging def is_main_process(): """ Check if this is the main control process and may handle one time tasks @@ -121,23 +122,56 @@ def broadcast(self, fcn, args): def map(self, f, items): return [f(a) for a in items] +def use_mpi(require_mpi=False, log=True): + """ Get whether MPI is enabled and if so the current size and rank + """ + use_mpi = False + size = rank = 0 + try: + from mpi4py import MPI + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + if size > 1: + use_mpi = True + if log: + logging.info('Running under mpi with size: %s, rank: %s', + size, rank) + except ImportError as e: + if require_mpi: + print(e) + raise ValueError("Failed to load mpi, ensure mpi4py is installed") + return use_mpi, size, rank + def choose_pool(processes, mpi=False): - if mpi: + """ Get processing pool + """ + do_mpi, size, rank = use_mpi(require_mpi=mpi) + if do_mpi: try: import schwimmbad - pool = schwimmbad.choose_pool(mpi=mpi, - processes=processes) + pool = schwimmbad.choose_pool(mpi=do_mpi, + processes=(size - 1)) pool.broadcast = types.MethodType(_dummy_broadcast, pool) atexit.register(pool.close) + + if processes: + logging.info('NOTE: that for MPI process size determined by ' + 'MPI launch size, not the processes argument') + + if do_mpi and not mpi: + logging.info('NOTE: using MPI as this process was launched' + 'under MPI') except ImportError: raise ValueError("Failed to start up an MPI pool, " - "install mpi4py / schwimmbadd") + "install mpi4py / schwimmbad") elif processes == 1: pool = SinglePool() else: pool = BroadcastPool(processes) pool.size = processes + if size: + pool.size = size return pool - From d3358f597dcffabb1549aaecefda7276bbf1528c Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Mon, 13 Jul 2020 10:25:47 +0200 Subject: [PATCH 63/68] minor fixes to enable ultranest using MPI (#3374) * fix typo * simplify mpi launch process * simplify pool calls * fixes * fixes * Update dynesty.py * cc * detected case where not launched * enable multicore support for ultranest * cc --- pycbc/inference/sampler/ultranest.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pycbc/inference/sampler/ultranest.py b/pycbc/inference/sampler/ultranest.py index d0c5afe446c..66dc2dabdcd 100644 --- a/pycbc/inference/sampler/ultranest.py +++ b/pycbc/inference/sampler/ultranest.py @@ -29,11 +29,13 @@ from __future__ import absolute_import +import sys import logging import numpy from pycbc.inference.io.ultranest import UltranestFile from pycbc.io.hdf import dump_state +from pycbc.pool import use_mpi from .base import (BaseSampler, setup_output) from .base_cube import setup_calls @@ -101,8 +103,13 @@ def __init__(self, model, log_dir=None, self.result = None self.kwargs = kwargs # Keywords for the run method of ultranest + do_mpi, _, rank = use_mpi() + self.main = (not do_mpi) or (rank == 0) + def run(self): self.result = self._sampler.run(**self.kwargs) + if not self.main: + sys.exit(0) self._sampler.print_results() if self.enable_plots: @@ -144,7 +151,10 @@ def from_config(cls, cp, model, output_file=None, **kwds): value = cp.get('sampler', opt_name) skeys[opt_name] = opts[opt_name](value) inst = cls(model, **skeys) - setup_output(inst, output_file) + + do_mpi, _, rank = use_mpi() + if not do_mpi or (rank == 0): + setup_output(inst, output_file) return inst def checkpoint(self): From b7045dc3aee8243648f0991c826ef0bba5002949 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Mon, 13 Jul 2020 14:31:40 +0200 Subject: [PATCH 64/68] update gate time series method to allow selection of gating type (#3372) * update gate method to include selectable type * ws * comments * cc * com --- pycbc/strain/gate.py | 6 ++-- pycbc/types/frequencyseries.py | 52 ++++++++++++++++++---------------- pycbc/types/timeseries.py | 47 ++++++++++++++++++++++++++---- 3 files changed, 73 insertions(+), 32 deletions(-) diff --git a/pycbc/strain/gate.py b/pycbc/strain/gate.py index eceefba1861..b0dce62de52 100644 --- a/pycbc/strain/gate.py +++ b/pycbc/strain/gate.py @@ -16,8 +16,8 @@ """ Functions for applying gates to data. """ -from . import strain from scipy import linalg +from . import strain def _gates_from_cli(opts, gate_opt): @@ -163,6 +163,8 @@ def gate_and_paint(data, lindex, rindex, invpsd, copy=True): TimeSeries : The gated and in-painted time series. """ + # Uses the hole-filling method of + # https://arxiv.org/pdf/1908.05644.pdf # Copy the data and zero inside the hole if copy: data = data.copy() @@ -171,7 +173,7 @@ def gate_and_paint(data, lindex, rindex, invpsd, copy=True): # get the over-whitened gated data tdfilter = invpsd.astype('complex').to_timeseries() * invpsd.delta_t owhgated_data = (data.to_frequencyseries() * invpsd).to_timeseries() - + # remove the projection into the null space proj = linalg.solve_toeplitz(tdfilter[:(rindex - lindex)], owhgated_data[lindex:rindex]) diff --git a/pycbc/types/frequencyseries.py b/pycbc/types/frequencyseries.py index d8c7ea7f634..dc0419ad3d7 100644 --- a/pycbc/types/frequencyseries.py +++ b/pycbc/types/frequencyseries.py @@ -72,7 +72,7 @@ def __init__(self, initial_array, delta_f=None, epoch="", dtype=None, copy=True) else: epoch = _lal.LIGOTimeGPS(0) elif epoch is not None: - try: + try: if isinstance(epoch, _numpy.generic): # In python3 lal LIGOTimeGPS will not work on numpy # types as input. A quick google on how to generically @@ -170,7 +170,7 @@ def delta_t(self): @property def sample_rate(self): - """Return the sample rate this would have in the time domain. This + """Return the sample rate this would have in the time domain. This assumes even length time series! """ return (len(self) - 1) * self.delta_f * 2.0 @@ -382,8 +382,8 @@ def save(self, path, group=None, ifo='P1'): ---------- path: string Destination file path. Must end with either .hdf, .npy or .txt. - - group: string + + group: string Additional name for internal storage use. Ex. hdf storage uses this as the key value. @@ -436,23 +436,27 @@ def save(self, path, group=None, ifo='P1'): raise ValueError('Path must end with .npy, .txt, .xml, .xml.gz ' 'or .hdf') + def to_frequencyseries(self): + """ Return frequency series """ + return self + @_noreal def to_timeseries(self, delta_t=None): """ Return the Fourier transform of this time series. Note that this assumes even length time series! - + Parameters ---------- delta_t : {None, float}, optional - The time resolution of the returned series. By default the - resolution is determined by length and delta_f of this frequency + The time resolution of the returned series. By default the + resolution is determined by length and delta_f of this frequency series. - + Returns - ------- - TimeSeries: - The inverse fourier transform of this frequency series. + ------- + TimeSeries: + The inverse fourier transform of this frequency series. """ from pycbc.fft import ifft from pycbc.types import TimeSeries, real_same_precision_as @@ -463,7 +467,7 @@ def to_timeseries(self, delta_t=None): # add 0.5 to round integer tlen = int(1.0 / self.delta_f / delta_t + 0.5) flen = int(tlen / 2 + 1) - + if flen < len(self): raise ValueError("The value of delta_t (%s) would be " "undersampled. Maximum delta_t " @@ -471,11 +475,11 @@ def to_timeseries(self, delta_t=None): if not delta_t: tmp = self else: - tmp = FrequencySeries(zeros(flen, dtype=self.dtype), + tmp = FrequencySeries(zeros(flen, dtype=self.dtype), delta_f=self.delta_f, epoch=self.epoch) tmp[:len(self)] = self[:] - - f = TimeSeries(zeros(tlen, + + f = TimeSeries(zeros(tlen, dtype=real_same_precision_as(self)), delta_t=delta_t) ifft(tmp, f) @@ -485,11 +489,11 @@ def to_timeseries(self, delta_t=None): def cyclic_time_shift(self, dt): """Shift the data and timestamps by a given number of seconds - Shift the data and timestamps in the time domain a given number of - seconds. To just change the time stamps, do ts.start_time += dt. + Shift the data and timestamps in the time domain a given number of + seconds. To just change the time stamps, do ts.start_time += dt. The time shift may be smaller than the intrinsic sample rate of the data. Note that data will be cycliclly rotated, so if you shift by 2 - seconds, the final 2 seconds of your data will now be at the + seconds, the final 2 seconds of your data will now be at the beginning of the data set. Parameters @@ -544,7 +548,7 @@ def match(self, other, psd=None, other.resize(int(other.sample_rate * self.duration)) other = other.to_frequencyseries() - + if len(other) != len(self): other = other.copy() other.resize(len(self)) @@ -567,7 +571,7 @@ def load_frequencyseries(path, group=None): path: string source file path. Must end with either .npy or .txt. - group: string + group: string Additional name for internal storage use. Ex. hdf storage uses this as the key value. @@ -575,10 +579,10 @@ def load_frequencyseries(path, group=None): ------ ValueError If path does not end in .npy or .txt. - """ + """ ext = _os.path.splitext(path)[1] if ext == '.npy': - data = _numpy.load(path) + data = _numpy.load(path) elif ext == '.txt': data = _numpy.loadtxt(path) elif ext == '.hdf': @@ -586,12 +590,12 @@ def load_frequencyseries(path, group=None): f = h5py.File(path, 'r') data = f[key][:] series = FrequencySeries(data, delta_f=f[key].attrs['delta_f'], - epoch=f[key].attrs['epoch']) + epoch=f[key].attrs['epoch']) f.close() return series else: raise ValueError('Path must end with .npy, .hdf, or .txt') - + if data.ndim == 2: delta_f = (data[-1][0] - data[0][0]) / (len(data)-1) epoch = _lal.LIGOTimeGPS(data[0][0]) diff --git a/pycbc/types/timeseries.py b/pycbc/types/timeseries.py index a40a092c192..b2fb44508f9 100644 --- a/pycbc/types/timeseries.py +++ b/pycbc/types/timeseries.py @@ -478,25 +478,56 @@ def psd(self, segment_duration, **kwds): seg_stride=seg_stride, **kwds) - def gate(self, time, zero_width=0.25, taper_width=0.25): + def gate(self, time, window=0.25, method='taper', copy=True, + taper_width=0.25, invpsd=None): """ Gate out portion of time series Parameters ---------- time: float Central time of the gate in seconds - zero_width: float - Half-length in seconds of zeros around gate. + window: float + Half-length in seconds to remove data around gate time. + method: str + Method to apply gate, options are 'hard', 'taper', and 'paint'. + copy: bool + If False, do operations inplace to this time series, else return + new time series. taper_width: float - Lenght of tapering region on either side of zero'd data + Length of tapering region on either side of excized data. Only + applies to the taper gating method. + invpsd: pycbc.types.FrequencySeries + The inverse PSD to use for painting method. If not given, + a PSD is generated using default settings. Returns ------- data: pycbc.types.TimeSeris Gated time series """ - from pycbc.strain import gate_data - return gate_data(self.copy(), [(time, zero_width, taper_width)]) + data = self.copy() if copy else self + if method == 'taper': + from pycbc.strain import gate_data + return gate_data(data, [(time, window, taper_width)]) + elif method == 'paint': + # Uses the hole-filling method of + # https://arxiv.org/pdf/1908.05644.pdf + from pycbc.strain.gate import gate_and_paint + if invpsd is None: + # These are some bare minimum settings, normally you + # should probably provide a psd + invpsd = 1. / self.filter_psd(self.duration/32, self.delta_f, 0) + lindex = int((time - window - self.start_time) / self.delta_t) + rindex = lindex + int(2 * window / self.delta_t) + lindex = lindex if lindex >= 0 else 0 + rindex = rindex if rindex <= len(self) else len(self) + return gate_and_paint(data, lindex, rindex, invpsd, copy=False) + elif method == 'hard': + tslice = data.time_slice(time - window, time + window) + tslice[:] = 0 + return data + else: + raise ValueError('Invalid method name: {}'.format(method)) def filter_psd(self, segment_duration, delta_f, flow): """ Calculate the power spectral density of this time series. @@ -797,6 +828,10 @@ def save(self, path, group = None): else: raise ValueError('Path must end with .npy, .txt or .hdf') + def to_timeseries(self): + """ Return time series""" + return self + @_nocomplex def to_frequencyseries(self, delta_f=None): """ Return the Fourier transform of this time series From ef02479d2b4df080345d3f0197aaef8647e6db0f Mon Sep 17 00:00:00 2001 From: JulianWesterweck <31701998+JulianWesterweck@users.noreply.github.com> Date: Mon, 13 Jul 2020 17:13:43 +0200 Subject: [PATCH 65/68] Update format_lmns in ringdown module. (#3335) --- pycbc/waveform/ringdown.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pycbc/waveform/ringdown.py b/pycbc/waveform/ringdown.py index ddaeb2d03a1..612811192be 100644 --- a/pycbc/waveform/ringdown.py +++ b/pycbc/waveform/ringdown.py @@ -80,12 +80,16 @@ def format_lmns(lmns): # In Python3 this might be "[b'221', b'331']" if isinstance(lmns, str): # strip off brackets and convert to list - lmns = lmns.strip('[]').split(',') + for char in ["[", "]", "'", " ", "b"]: + lmns = lmns.replace(char,'') + lmns = lmns.split(',') # Case 2: a list with only one string with a list ["221', '331"] # In Python3 this might be ["b221', b'331"] elif (len(lmns) == 1 and isinstance(lmns[0], str) and len(lmns[0]) > 3): + for char in ["[", "]", "'", " ", "b"]: + lmns[0] = lmns[0].replace(char,'') lmns = lmns[0].split(',') out = [] From e0c4e6296567131e9b2f20a8816ec19275cee526 Mon Sep 17 00:00:00 2001 From: Alex Nitz Date: Mon, 13 Jul 2020 17:15:03 +0200 Subject: [PATCH 66/68] Update pool.py (#3375) --- pycbc/pool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycbc/pool.py b/pycbc/pool.py index efbf55ec939..0799ad1aec0 100644 --- a/pycbc/pool.py +++ b/pycbc/pool.py @@ -134,9 +134,9 @@ def use_mpi(require_mpi=False, log=True): rank = comm.Get_rank() if size > 1: use_mpi = True - if log: - logging.info('Running under mpi with size: %s, rank: %s', - size, rank) + if log: + logging.info('Running under mpi with size: %s, rank: %s', + size, rank) except ImportError as e: if require_mpi: print(e) From 4cd71b6c355bc6e34e890ab75b3832b4c6c9b679 Mon Sep 17 00:00:00 2001 From: Collin Capano Date: Mon, 13 Jul 2020 23:58:24 +0200 Subject: [PATCH 67/68] make sure sampling parameters keep the same order (#3377) --- pycbc/inference/models/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pycbc/inference/models/base.py b/pycbc/inference/models/base.py index ee2bf8a0b9d..1b4926af993 100644 --- a/pycbc/inference/models/base.py +++ b/pycbc/inference/models/base.py @@ -120,6 +120,8 @@ def __init__(self, variable_params, sampling_params, if arg not in replace_parameters] # add the sampling parameters self.sampling_params += sampling_params + # sort to make sure we have a consistent order + self.sampling_params.sort() self.sampling_transforms = sampling_transforms def logjacobian(self, **params): @@ -272,7 +274,7 @@ def read_sampling_params_from_config(cp, section_group=None, map_args = cp.get(section, args) sampling_params.update(set(map(str.strip, map_args.split(',')))) replaced_params.update(set(map(str.strip, args.split(',')))) - return list(sampling_params), list(replaced_params) + return sorted(sampling_params), sorted(replaced_params) # From 9177491f24a52b3fd10a7a1e31f8451dd297d599 Mon Sep 17 00:00:00 2001 From: Ian Harry Date: Tue, 14 Jul 2020 11:42:45 +0100 Subject: [PATCH 68/68] Consolidate resolve url (#3361) * Implement a resolve_url_to_file function * Update to add local CVMFS registry * Typo fix * Typo fix and allow localhost * Attrs is needed so rethinking needed * Typoe fix * Another typo * Always need local PFN * All CVMFS URLs * Numerous codeclimate issues --- pycbc/workflow/core.py | 91 ++++++++++++++++++++++++++++++++----- pycbc/workflow/grb_utils.py | 14 +++--- pycbc/workflow/injection.py | 25 +++++----- pycbc/workflow/psdfiles.py | 19 +++----- pycbc/workflow/tmpltbank.py | 22 ++++----- 5 files changed, 117 insertions(+), 54 deletions(-) diff --git a/pycbc/workflow/core.py b/pycbc/workflow/core.py index 55200a44762..6c398157413 100644 --- a/pycbc/workflow/core.py +++ b/pycbc/workflow/core.py @@ -366,8 +366,6 @@ def add_ini_opts(self, cp, sec): ifo = split_path[0] path = split_path[1] - curr_lfn = os.path.basename(path) - # If the file exists make sure to use the # fill path as a file:// URL if os.path.isfile(path): @@ -376,15 +374,7 @@ def add_ini_opts(self, cp, sec): else: curr_pfn = path - if curr_lfn in file_input_from_config_dict.keys(): - file_pfn = file_input_from_config_dict[curr_lfn][2] - assert(file_pfn == curr_pfn) - curr_file = file_input_from_config_dict[curr_lfn][1] - else: - local_file_path = resolve_url(curr_pfn) - curr_file = File.from_path(local_file_path) - tuple_val = (local_file_path, curr_file, curr_pfn) - file_input_from_config_dict[curr_lfn] = tuple_val + curr_file = resolve_url_to_file(curr_pfn) self.common_input_files.append(curr_file) if ifo: self.common_raw_options.append(ifo + ':') @@ -2006,6 +1996,85 @@ def __str__(self): msg += "The failed command has been printed in %s ." %(self.cmdFile) return msg +def resolve_url_to_file(curr_pfn, attrs=None): + """ + Resolves a PFN into a workflow.File object. + + This function will resolve a PFN to a workflow.File object. If a File + object already exists for that PFN that will be returned, otherwise a new + object is returned. We will implement default site schemes here as needed, + for example cvfms paths will be added to the osg and nonfsio sites in + addition to local. If the LFN is a duplicate of an existing one, but with a + different PFN an AssertionError is raised. The attrs keyword-argument can + be used to specify attributes of a file. All files have 4 possible + attributes. A list of ifos, an identifying string - usually used to give + the name of the executable that created the file, a segmentlist over which + the file is valid and tags specifying particular details about those files. + If attrs['ifos'] is set it will be used as the ifos, otherwise this will + default to ['H1', 'K1', 'L1', 'V1']. If attrs['exe_name'] is given this + will replace the "exe_name" sent to File.__init__ otherwise 'INPUT' will + be given. segs will default to [[1,2000000000]] unless overridden with + attrs['segs']. tags will default to an empty list unless overriden + with attrs['tag']. If attrs is None it will be ignored and all defaults + will be used. It is emphasized that these attributes are for the most part + not important with input files. Exceptions include things like input + template banks, where ifos and valid times will be checked in the workflow + and used in the naming of child job output files. + """ + cvmfsstr1 = 'file:///cvmfs/' + cvmfsstr2 = 'file://localhost/cvmfs/' + cvmfsstrs = (cvmfsstr1, cvmfsstr2) + + # Get LFN + urlp = urllib.parse.urlparse(curr_pfn) + curr_lfn = os.path.basename(urlp.path) + + # Does this already exist as a File? + if curr_lfn in file_input_from_config_dict.keys(): + file_pfn = file_input_from_config_dict[curr_lfn][2] + # If the PFNs are different, but LFNs are the same then fail. + assert(file_pfn == curr_pfn) + curr_file = file_input_from_config_dict[curr_lfn][1] + else: + # Use resolve_url to download file/symlink as appropriate + local_file_path = resolve_url(curr_pfn) + # Create File object with default local path + # To do this we first need to check the attributes + if attrs and 'ifos' in attrs: + ifos = attrs['ifos'] + else: + ifos = ['H1', 'K1', 'L1', 'V1'] + if attrs and 'exe_name' in attrs: + exe_name = attrs['exe_name'] + else: + exe_name = 'INPUT' + if attrs and 'segs' in attrs: + segs = attrs['segs'] + else: + segs = segments.segment([1, 2000000000]) + if attrs and 'tags' in attrs: + tags = attrs['tags'] + else: + tags = [] + + curr_file = File(ifos, exe_name, segs, local_file_path, tags=tags) + pfn_local = urljoin('file:', pathname2url(local_file_path)) + curr_file.PFN(pfn_local, 'local') + # Add other PFNs for nonlocal sites as needed. + # This block could be extended as needed + if curr_pfn.startswith(cvmfsstrs): + curr_file.PFN(curr_pfn, site='osg') + curr_file.PFN(curr_pfn, site='nonfsio') + # Also register the CVMFS PFN with the local site. We want to + # prefer this, and symlink from here, when possible. + # However, I think we need a little more to avoid it symlinking + # to this through an NFS mount. + curr_file.PFN(curr_pfn, site='local') + # Store the file to avoid later duplication + tuple_val = (local_file_path, curr_file, curr_pfn) + file_input_from_config_dict[curr_lfn] = tuple_val + return curr_file + def get_full_analysis_chunk(science_segs): """ Function to find the first and last time point contained in the science segments diff --git a/pycbc/workflow/grb_utils.py b/pycbc/workflow/grb_utils.py index 4fc796897c3..1f2ea260e1b 100644 --- a/pycbc/workflow/grb_utils.py +++ b/pycbc/workflow/grb_utils.py @@ -36,7 +36,7 @@ from six.moves.urllib.parse import urljoin from ligo import segments from glue.ligolw import ligolw, lsctables, utils, ilwd -from pycbc.workflow.core import File, FileList, resolve_url +from pycbc.workflow.core import File, FileList, resolve_url_to_file from pycbc.workflow.jobsetup import select_generic_executable @@ -224,11 +224,13 @@ def get_ipn_sky_files(workflow, file_url, tags=None): File object representing the IPN sky points file. ''' tags = tags or [] - ipn_sky_points = resolve_url(file_url) - sky_points_url = urljoin("file:", pathname2url(ipn_sky_points)) - sky_points_file = File(workflow.ifos, "IPN_SKY_POINTS", - workflow.analysis_time, file_url=sky_points_url, tags=tags) - sky_points_file.PFN(sky_points_url, site="local") + file_attrs = { + 'ifos': workflow.ifos, + 'segs': workflow.analysis_time, + 'exe_name': "IPN_SKY_POINTS", + 'tags': tags + } + sky_points_file = resolve_url_to_file(file_url, attrs=file_attrs) return sky_points_file diff --git a/pycbc/workflow/injection.py b/pycbc/workflow/injection.py index 69b9a705eaf..2eb61fb4339 100644 --- a/pycbc/workflow/injection.py +++ b/pycbc/workflow/injection.py @@ -30,9 +30,8 @@ """ import logging -from six.moves.urllib.request import pathname2url -from six.moves.urllib.parse import urljoin -from pycbc.workflow.core import File, FileList, make_analysis_dir, Executable, resolve_url +from pycbc.workflow.core import FileList, make_analysis_dir +from pycbc.workflow.core import Executable, resolve_url_to_file from pycbc.workflow.jobsetup import (LalappsInspinjExecutable, LigolwCBCJitterSkylocExecutable, LigolwCBCAlignTotalSpinExecutable, PycbcDarkVsBrightInjectionsExecutable) @@ -147,14 +146,18 @@ def setup_injection_workflow(workflow, output_dir=None, inj_file = node.output_files[0] inj_files.append(inj_file) elif injection_method == "PREGENERATED": - injectionFilePath = workflow.cp.get_opt_tags("workflow-injections", - "injections-pregenerated-file", curr_tags) - injectionFilePath = resolve_url(injectionFilePath) - file_url = urljoin('file:', pathname2url(injectionFilePath)) - inj_file = File('HL', 'PREGEN_inj_file', full_segment, file_url, - tags=curr_tags) - inj_file.PFN(injectionFilePath, site='local') - inj_files.append(inj_file) + file_attrs = { + 'ifos': ['HL'], + 'segs': full_segment, + 'tags': curr_tags + } + injection_path = workflow.cp.get_opt_tags( + "workflow-injections", + "injections-pregenerated-file", + curr_tags + ) + curr_file = resolve_url_to_file(injection_path, attrs=file_attrs) + inj_files.append(curr_file) elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]: inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, out_dir=output_dir, ifos=ifos, diff --git a/pycbc/workflow/psdfiles.py b/pycbc/workflow/psdfiles.py index c569809109b..fe6c4b6b99f 100644 --- a/pycbc/workflow/psdfiles.py +++ b/pycbc/workflow/psdfiles.py @@ -36,7 +36,8 @@ from six.moves import configparser as ConfigParser from six.moves.urllib.request import pathname2url from six.moves.urllib.parse import urljoin -from pycbc.workflow.core import File, FileList, make_analysis_dir, resolve_url +from pycbc.workflow.core import File, FileList +from pycbc.workflow.core import make_analysis_dir, resolve_url_to_file def setup_psd_workflow(workflow, science_segs, datafind_outs, output_dir=None, tags=None): @@ -117,17 +118,14 @@ def setup_psd_pregenerated(workflow, tags=None): cp = workflow.cp global_seg = workflow.analysis_time - user_tag = "PREGEN_PSD" + file_attrs = {'segs': global_seg, 'tags': tags} # Check for one psd for all ifos try: pre_gen_file = cp.get_opt_tags('workflow-psd', 'psd-pregenerated-file', tags) - pre_gen_file = resolve_url(pre_gen_file) - file_url = urljoin('file:', pathname2url(pre_gen_file)) - curr_file = File(workflow.ifos, user_tag, global_seg, file_url, - tags=tags) - curr_file.PFN(file_url, site='local') + file_attrs['ifos'] = workflow.ifos + curr_file = resolve_url_to_file(pre_gen_file, attrs=file_attrs) psd_files.append(curr_file) except ConfigParser.Error: # Check for one psd per ifo @@ -136,11 +134,8 @@ def setup_psd_pregenerated(workflow, tags=None): pre_gen_file = cp.get_opt_tags('workflow-psd', 'psd-pregenerated-file-%s' % ifo.lower(), tags) - pre_gen_file = resolve_url(pre_gen_file) - file_url = urljoin('file:', pathname2url(pre_gen_file)) - curr_file = File(ifo, user_tag, global_seg, file_url, - tags=tags) - curr_file.PFN(file_url, site='local') + file_attrs['ifos'] = [ifo] + curr_file = resolve_url_to_file(pre_gen_file, attrs=file_attrs) psd_files.append(curr_file) except ConfigParser.Error: diff --git a/pycbc/workflow/tmpltbank.py b/pycbc/workflow/tmpltbank.py index 9310838e1e3..9322f529f74 100644 --- a/pycbc/workflow/tmpltbank.py +++ b/pycbc/workflow/tmpltbank.py @@ -33,10 +33,9 @@ import os import logging from six.moves import configparser as ConfigParser -from six.moves.urllib.request import pathname2url -from six.moves.urllib.parse import urljoin import pycbc -from pycbc.workflow.core import File, FileList, make_analysis_dir, resolve_url +from pycbc.workflow.core import FileList +from pycbc.workflow.core import make_analysis_dir, resolve_url_to_file from pycbc.workflow.jobsetup import select_tmpltbank_class, select_matchedfilter_class, sngl_ifo_job_setup def setup_tmpltbank_workflow(workflow, science_segs, datafind_outs, @@ -361,16 +360,14 @@ def setup_tmpltbank_pregenerated(workflow, tags=None): cp = workflow.cp global_seg = workflow.analysis_time - user_tag = "PREGEN_TMPLTBANK" + file_attrs = {'segs' : global_seg, 'tags' : tags} + try: # First check if we have a bank for all ifos pre_gen_bank = cp.get_opt_tags('workflow-tmpltbank', 'tmpltbank-pregenerated-bank', tags) - pre_gen_bank = resolve_url(pre_gen_bank) - file_url = urljoin('file:', pathname2url(pre_gen_bank)) - curr_file = File(workflow.ifos, user_tag, global_seg, file_url, - tags=tags) - curr_file.PFN(file_url, site='local') + file_attrs['ifos'] = workflow.ifos + curr_file = resolve_url_to_file(pre_gen_bank, attrs=file_attrs) tmplt_banks.append(curr_file) except ConfigParser.Error: # Okay then I must have banks for each ifo @@ -379,11 +376,8 @@ def setup_tmpltbank_pregenerated(workflow, tags=None): pre_gen_bank = cp.get_opt_tags('workflow-tmpltbank', 'tmpltbank-pregenerated-bank-%s' % ifo.lower(), tags) - pre_gen_bank = resolve_url(pre_gen_bank) - file_url = urljoin('file:', pathname2url(pre_gen_bank)) - curr_file = File(ifo, user_tag, global_seg, file_url, - tags=tags) - curr_file.PFN(file_url, site='local') + file_attrs['ifos'] = [ifo] + curr_file = resolve_url_to_file(pre_gen_bank, attrs=file_attrs) tmplt_banks.append(curr_file) except ConfigParser.Error: