Merge pull request #8 from gwastro/master

Update branch
maxtrevor · Jul 14, 2020 · 88bdfff · 88bdfff
2 parents 08763b2 + 9177491
commit 88bdfff
Show file tree

Hide file tree

Showing 89 changed files with 5,580 additions and 1,461 deletions.
diff --git a/bin/hdfcoinc/pycbc_calculate_psd b/bin/hdfcoinc/pycbc_calculate_psd
@@ -1,12 +1,13 @@
 #!/usr/bin/env python
 """ Calculate psd estimates for analysis segments
 """
-import logging, argparse, numpy, h5py, multiprocessing, time
+import logging, argparse, numpy, h5py, multiprocessing, time, copy
 from six.moves import (range, zip_longest)
 import pycbc, pycbc.psd, pycbc.strain, pycbc.events
 from pycbc.version import git_verbose_msg as version
 from pycbc.fft.fftw import set_measure_level
-from ligo.segments import segmentlist
+from pycbc.workflow import resolve_td_option
+from ligo.segments import segmentlist, segment
 set_measure_level(0)
 
 parser = argparse.ArgumentParser(description=__doc__)
@@ -43,13 +44,16 @@ def get_psd(input_tuple):
 
     logging.info('%d: getting strain for %.1f-%.1f (%.1f s)', i, seg[0],
                  seg[1], abs(seg))
-    args.gps_start_time = int(seg[0]) + args.pad_data
-    args.gps_end_time = int(seg[1]) - args.pad_data
+    argstmp = copy.deepcopy(args)
+    argstmp.gps_start_time = int(seg[0]) + args.pad_data
+    argstmp.gps_end_time = int(seg[1]) - args.pad_data
+    tmp_segment = segment([argstmp.gps_start_time, argstmp.gps_end_time])
+    argstmp.channel_name = resolve_td_option(args.channel_name, tmp_segment)
 
     # This helps when the filesystem is unreliable, and gives extra retries.
     # python has an internal limit of ~100 (it is not infinite)
     try:
-        gwstrain = pycbc.strain.from_cli(args, pycbc.DYN_RANGE_FAC)
+        gwstrain = pycbc.strain.from_cli(argstmp, pycbc.DYN_RANGE_FAC)
     except RuntimeError:
         time.sleep(10)
         return get_psd((seg, i))

diff --git a/bin/hdfcoinc/pycbc_coinc_hdfinjfind b/bin/hdfcoinc/pycbc_coinc_hdfinjfind
@@ -222,8 +222,15 @@ for trigger_file, injection_file in zip(args.trigger_files,
     # pick up optimal SNRs
     if multi_ifo_style:
         for ifo, column in args.optimal_snr_column.items():
+            optimal_snr_all = numpy.array(sim_table.get_column(column))
+            # As a single detector being vetoed won't veto all combinations,
+            # need to set optimal_snr of a vetoed ifo to zero in order
+            # to later calculate decisive optimal snr
+            iws, _ = indices_within_segments(inj_time, [args.veto_file], ifo=ifo,
+                                             segment_name=args.segment_name)
+            optimal_snr_all[iws] = 0
             hdf_append(fo, 'injections/optimal_snr_%s' % ifo,
-                       sim_table.get_column(column))
+                       optimal_snr_all)
     else:
         ifo_map = {f.attrs['detector_1']: 1,
                    f.attrs['detector_2']: 2}
@@ -248,10 +255,14 @@ for trigger_file, injection_file in zip(args.trigger_files,
                 continue
             if key not in fo:
                 fo.create_group(key)
-            fo[key].attrs['pivot'] = f[key].attrs['pivot']
-            fo[key].attrs['fixed'] = f[key].attrs['fixed']
-            fo[key].attrs['foreground_time'] = f[key].attrs['foreground_time']
-            fo[key].attrs['foreground_time_exc'] = f[key].attrs['foreground_time_exc']
+            if key in f:
+                fkey = f[key]
+            else:
+                fkey = f
+            fo[key].attrs['pivot'] = fkey.attrs['pivot']
+            fo[key].attrs['fixed'] = fkey.attrs['fixed']
+            fo[key].attrs['foreground_time'] = fkey.attrs['foreground_time']
+            fo[key].attrs['foreground_time_exc'] = fkey.attrs['foreground_time_exc']
     else:
         fo.attrs['detector_1'] = f.attrs['detector_1']
         fo.attrs['detector_2'] = f.attrs['detector_2']

diff --git a/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj b/bin/hdfcoinc/pycbc_multiifo_coinc_statmap_inj
@@ -101,8 +101,8 @@ if len(zdata) > 0:
     f['foreground/fap_exc'] = fap_exc
 
     logging.info('calculating injection backgrounds')
-    ifotimes = zip(zdata.data['%s/time' % ifo] for ifo in args.ifos)
-    ftimes = numpy.concatenate(ifotimes).mean(axis=0)
+    ifotimes = numpy.array([zdata.data['%s/time' % ifo] for ifo in args.ifos])
+    ftimes = ifotimes.mean(axis=0)
     start, end = ftimes - args.veto_window, ftimes + args.veto_window
 
     fnlouder = numpy.zeros(len(ftimes), dtype=numpy.float32)

diff --git a/bin/inference/pycbc_inference b/bin/inference/pycbc_inference
@@ -27,7 +27,7 @@ import numpy
 
 import pycbc
 from pycbc import (distributions, transforms, fft,
-                   opt, scheme)
+                   opt, scheme, pool)
 from pycbc.waveform import generator
 
 from pycbc import __version__
@@ -80,9 +80,8 @@ opts = parser.parse_args()
 
 # setup log
 # If we're running in MPI mode, only allow the parent to print
-if opts.use_mpi:
-    from mpi4py import MPI
-    rank = MPI.COMM_WORLD.Get_rank()
+use_mpi, size, rank = pycbc.pool.use_mpi(opts.use_mpi, log=False)
+if use_mpi:
     opts.verbose &= rank == 0
 pycbc.init_logging(opts.verbose)
 
@@ -122,7 +121,7 @@ with ctx:
                 cp.get('sampler', 'checkpoint-signal')))
         # create an empty output file to keep condor happy
         open(opts.output_file, 'a').close()
-    
+
     logging.info("Setting up model")
 
     # construct class that will return the natural logarithm of likelihood
@@ -141,7 +140,7 @@ with ctx:
     # Run the sampler
     sampler.run()
 
-    # Finalize the output 
+    # Finalize the output
     sampler.finalize()
 
 if condor_ckpt:
@@ -158,6 +157,10 @@ if not opts.save_backup:
     logging.info("Deleting backup file")
     os.remove(sampler.backup_file)
 
+# write the end time
+with sampler.io(opts.output_file, 'a') as fp:
+    fp.write_run_end_time()
+
 if condor_ckpt:
    # create an empty checkpoint file
    open(sampler.checkpoint_file, 'a').close()

diff --git a/bin/inference/pycbc_inference_plot_acl b/bin/inference/pycbc_inference_plot_acl
@@ -78,14 +78,11 @@ for param_name in parameters:
     logging.info("Plotting autocorrelation times")
     plt.hist(acls, opts.bins, histtype="step", label=labels[param_name])
 
-# get the file's acl
-fpacl = fp.thinned_by * fp[fp.sampler_group].attrs['acl']
-
 plt.xlabel("Autocorrelation time")
 plt.ylabel(r'Number of walkers')
 
-# plot autocorrelation length saved in hdf file
-plt.axvline(fpacl, linestyle='--')
+# plot autocorrelation time saved in hdf file
+plt.axvline(fp.act, linestyle='--')
 plt.legend()
 
 # save figure with meta-data

diff --git a/bin/inference/pycbc_inference_plot_samples b/bin/inference/pycbc_inference_plot_samples
@@ -32,15 +32,16 @@ from pycbc.inference import (option_utils, io)
 import sys
 
 # command line usage
-parser = argparse.parser = io.ResultsArgumentParser(skip_args=['walkers'])
+parser = argparse.parser = io.ResultsArgumentParser(
+    skip_args=['chains', 'iteration'])
 parser.add_argument("--verbose", action="store_true", default=False,
                     help="Print logging info.")
 parser.add_argument("--version", action="version", version=__version__,
                     help="show version number and exit")
-parser.add_argument("--walkers", nargs='+', default=None,
-                    help="Walker indices to plot. Options are 'all' or one "
-                         "or more walker indices. Default is to plot the "
-                         "average of all walkers for the input "
+parser.add_argument("--chains", nargs='+', default=None,
+                    help="Chain/walker indices to plot. Options are 'all' or "
+                         "one or more chain indices. Default is to plot the "
+                         "average of all chains for the input "
                          "`--parameters`.")
 parser.add_argument("--output-file", type=str, required=True,
                     help="Path to output plot.")
@@ -57,11 +58,11 @@ fp, parameters, labels, _ = io.results_from_cli(opts, load_samples=False)
 # get number of dimensions
 ndim = len(parameters)
 
-# get walker indices
-if opts.walkers == ['all'] or opts.walkers == None:
-    walkers = range(fp.nwalkers)
+# get chain indices
+if opts.chains == ['all'] or opts.chains == None:
+    chains = range(fp.nchains)
 else:
-    walkers = list(map(int, opts.walkers))
+    chains = list(map(int, opts.chains))
 
 # plot samples
 # plot each parameter as a different subplot
@@ -91,29 +92,36 @@ else:
 thinned_by = fp.thinned_by*xint
 xmin = xmin*fp.thinned_by
 
+# create the kwargs to load samples
+kwargs = {'thin_start': opts.thin_start,
+          'thin_interval': opts.thin_interval,
+          'thin_end': opts.thin_end}
 # add the temperature args if it exists
-additional_args = {}
 try:
-    additional_args['temps'] = opts.temps
+    kwargs['temps'] = opts.temps
 except AttributeError:
     pass
 
 for i, arg in enumerate(parameters):
     chains_arg = []
-    for widx in walkers:
-        chain = fp.read_samples(parameters, walkers=widx,
-                                thin_start=opts.thin_start,
-                                thin_interval=opts.thin_interval,
-                                thin_end=opts.thin_end, **additional_args)
+    for cidx in chains:
+        kwargs['chains'] = cidx
+        try:
+            chain = fp.read_samples(parameters, **kwargs)
+        except TypeError:
+            # will get this if ensemble sampler; change "chains" to "walkers"
+            kwargs['walkers'] = kwargs.pop('chains')
+            chain = fp.read_samples(parameters, **kwargs)
         chains_arg.append(chain[arg])
-    if opts.walkers is not None:
+    if opts.chains is not None:
         for chain in chains_arg:
-            # plot each walker as a different line on the subplot
-            axs[i].plot((numpy.arange(len(chain)))*thinned_by + xmin, chain, alpha=0.6)
+            # plot each chain as a different line on the subplot
+            axs[i].plot((numpy.arange(len(chain)))*thinned_by + xmin, chain,
+                        alpha=0.6)
     else:
-        # plot the average of all walkers for the parameter on the subplot
+        # plot the average of all chains for the parameter on the subplot
         chains_arg = numpy.array(chains_arg)
-        avg_chain = [chains_arg[:, j].sum()/fp.nwalkers
+        avg_chain = [chains_arg[:, j].sum()/fp.nchains
                      for j in range(len(chains_arg[0]))]
         axs[i].plot((numpy.arange(len(avg_chain)))*thinned_by + xmin, avg_chain)
     # Set y labels
@@ -124,10 +132,10 @@ fp.close()
 caption_kwargs = {
     "parameters" : ", ".join(sorted(list(labels.values()))),
 }
-caption = r"""Parameter samples from the walker chains whose indices were
-provided as inputs. Each line is a different chain of walker samples in that
-case. If no walker indices were provided, the plot shows the variation of the
-parameter sample values averaged over all walkers."""
+caption = r"""Parameter samples from the chains whose indices were
+provided as inputs. Each line is a different chain of samples in that
+case. If no chain indices were provided, the plot shows the variation of the
+parameter sample values averaged over all chains."""
 title = "Samples for {parameters}".format(**caption_kwargs)
 results.save_fig_with_metadata(fig, opts.output_file,
                                cmd=" ".join(sys.argv),

diff --git a/bin/plotting/pycbc_ifar_catalog b/bin/plotting/pycbc_ifar_catalog
@@ -131,7 +131,9 @@ if h_inc_back_num >= 0 and h_iterations is not None and h_iterations != 0:
     h_rm_ifar = hrm_sorted[idx_start:]
     h_rm_cumnum = numpy.arange(len(h_rm_ifar), 0, -1)
 else:
-    fore_ifar = f['foreground/' + ifar_str][:]
+    fore_ifar = numpy.array([])
+    for f in trigf:
+        fore_ifar = numpy.append(fore_ifar, f['foreground/' + ifar_str][:])
 
 if opts.remove_threshold is not None and opts.truncate_threshold is not None:
     raise RuntimeError("Can't both remove and truncate foreground events!")

diff --git a/bin/pycbc_live b/bin/pycbc_live
@@ -562,6 +562,9 @@ parser.add_argument('--store-psd', action='store_true')
 parser.add_argument('--output-background', type=str, nargs='+',
                     help='Takes a period in seconds and a file path and dumps '
                          'the coinc backgrounds to that path with that period')
+parser.add_argument('--output-background-n-loudest', type=int, default=0,
+                    help="If given an integer (assumed positive), it stores loudest n triggers"
+                    "(not sorted) for each of the coinc background")
 
 parser.add_argument('--newsnr-threshold', type=float, default=0)
 parser.add_argument('--max-batch-size', type=int, default=2**27)
@@ -872,8 +875,14 @@ with ctx:
                 bg_fn = os.path.join(args.output_background[1], bg_fn)
                 with h5py.File(bg_fn, 'w') as bgf:
                     for bg_ifos, bg_data, bg_time in bg_dists:
-                        ds = bgf.create_dataset(','.join(sorted(bg_ifos)),
-                                                data=bg_data, compression='gzip')
+                        if args.output_background_n_loudest and (args.output_background_n_loudest < len(bg_data)-1):
+                            n_loudest = args.output_background_n_loudest
+                            assert (n_loudest > 0), "We can only store positive int loudest triggers."
+                            ds = bgf.create_dataset(','.join(sorted(bg_ifos)),
+                                    data=-numpy.partition(-bg_data, n_loudest)[:n_loudest], compression='gzip')
+                        else:
+                            ds = bgf.create_dataset(','.join(sorted(bg_ifos)),
+                                                    data=bg_data, compression='gzip')
                         ds.attrs['background_time'] = bg_time
                     bgf.attrs['gps_time'] = last_bg_dump_time
 

diff --git a/bin/workflows/pycbc_create_offline_search_workflow b/bin/workflows/pycbc_create_offline_search_workflow
@@ -224,7 +224,7 @@ else:
         # Create coinc tag
         coinctag = '{}det'.format(len(ifocomb))
         ctagcomb = ['full_data', coinctag]
-        other_ifo_keys = no_fg_exc_files.keys()
+        other_ifo_keys = list(no_fg_exc_files.keys())
         other_ifo_keys.remove(ordered_ifo_list)
         other_bg_files = {ctype: no_fg_exc_files[ctype]
                           for ctype in other_ifo_keys}

diff --git a/docs/index.rst b/docs/index.rst
@@ -72,16 +72,28 @@ your own build.
 
    pip install lalsuite pycbc
 
-Full detailed installation instructions for users who want to use and develop PyCBC are available at:
+Full detailed installation instructions which covers other installation cases:
 
 .. toctree::
    :maxdepth: 1
 
    install
 
-=======================
-Documentation for Users
-=======================
+====================================================
+Parameter Estimation of Gravitational-wave Sources
+====================================================
+
+Users who want to create and run parameter estimation workflows should read the
+documentation at:
+
+.. toctree::
+   :maxdepth: 2
+
+   inference
+
+==========================================
+Searching for Gravitational-wave Signals
+==========================================
 
 Users who want to create and run scientific workflows to search for compact
 binaries should read the documentation in the links at:
@@ -93,13 +105,9 @@ binaries should read the documentation in the links at:
    workflow/pycbc_make_coinc_search_workflow
    workflow/pygrb.rst
 
-Users who want to create and run parameter estimation workflows should read the
-documentation at:
-
-.. toctree::
-   :maxdepth: 2
-
-   inference
+===================================================
+Template Banks, Hardware Injections, and more...
+===================================================
 
 Users who are interested in tools that PyCBC provides for various other
 analysis tasks (e.g. template bank generation, hardware injections, and testing
@@ -114,6 +122,19 @@ template banks) should read the documentation at:
    faithsim
    upload_to_gracedb
 
+==========================================
+Extending PyCBC with external plugins
+==========================================
+
+Would you like to use a waveform model that PyCBC doesn't have? Or maybe
+you have your own waveform you'd like to use for a search, parameter estimation
+, etc. PyCBC supports a plug-in archictecture for external waveform models. 
+
+.. toctree::
+   :maxdepth: 1
+
+   waveform_plugin
+
 ==========================================
 Library Examples and Interactive Tutorials
 ==========================================

diff --git a/docs/inference.rst b/docs/inference.rst
@@ -250,6 +250,11 @@ will remain fixed throughout the run. For example:
    approximant = IMRPhenomPv2
    f_lower = 18
 
+In the example above, we choose the waveform model 'IMRPhenomPv2'. PyCBC comes
+with access to waveforms provided by the lalsimulation package. If you'd like
+to use a custom waveform outside of what PyCBC currently supports,
+see :ref:`documentation on creating a plugin for PyCBC<waveform_plugin>`
+
 ^^^^^^^^^^^^
 Setting data
 ^^^^^^^^^^^^