Add calculation of source type probabilities to PyCBC Live (gwastro#3077

) * Introduce source probability calculation * Mass gap probabilities improved * Change save function to add probs file * Change in z_delta and mchirp_delta * Add argument for mchirp_delta coefficient * Add mchirp bbh condition * Change save function * Add CBC probabilities to upload method * Small corrections to mchirp_area.py * Add logging messages * Fix small errors * Fix small syntax error * Fix codeclimate issues * Fix codeclimate issues * Fix codeclimate issues * Fix Travis issue * Changes in pie plot code * Remove unnecessary serialization to JSON file * Move distance and redshift estimations to mchirp_area module * Fix small errors * Fix codeclimate issue * Add color codes for the different CBC sources * Change option names and help strings * Assign colors to CBC sources and tag JSON file and plot as EM followup * Fix codeclimate issues * Change mc_area_args to be an attribute of LiveEventManager
lenona · Sep 14, 2020 · 4b7db91 · 4b7db91
1 parent a9d560f
commit 4b7db91
Show file tree

Hide file tree

Showing 4 changed files with 190 additions and 26 deletions.
diff --git a/bin/pycbc_live b/bin/pycbc_live
@@ -31,6 +31,7 @@ from pycbc.io.live import SingleCoincForGraceDB
 import pycbc.waveform.bank
 from pycbc.vetoes.sgchisq import SingleDetSGChisq
 from pycbc.waveform.waveform import props
+from pycbc import mchirp_area
 
 
 def ppdets(ifos):
@@ -63,14 +64,15 @@ def combine_ifar_pvalue(ifar, pvalue, livetime):
 
 
 class LiveEventManager(object):
-    def __init__(self, output_path,
+    def __init__(self, output_path, mc_area_args,
                  use_date_prefix=False,
                  ifar_upload_threshold=None,
                  pval_livetime=None,
                  enable_gracedb_upload=False,
                  gracedb_testing=True,
                  run_snr_optimization=False):
         self.path = output_path
+        self.mc_area_args = mc_area_args
 
         # Figure out what we are supposed to process within the pool of MPI processes
         self.comm = mpi.COMM_WORLD
@@ -241,7 +243,8 @@ class LiveEventManager(object):
             event = SingleCoincForGraceDB(live_ifos, coinc_results, bank=bank,
                                           psds=psds, followup_data=fud,
                                           low_frequency_cutoff=f_low,
-                                          channel_names=args.channel_name)
+                                          channel_names=args.channel_name,
+                                          mc_area_args=self.mc_area_args)
 
             end_time = int(coinc_results['foreground/%s/end_time'
                                          % coinc_ifos[0]])
@@ -606,6 +609,7 @@ LiveSingle.insert_args(parser)
 fft.insert_fft_option_group(parser)
 Coincer.insert_args(parser)
 SingleDetSGChisq.insert_option_group(parser)
+mchirp_area.insert_args(parser)
 args = parser.parse_args()
 scheme.verify_processing_options(args, parser)
 fft.verify_fft_options(args, parser)
@@ -641,7 +645,8 @@ evnt = LiveEventManager(args.output_path,
                         pval_livetime=args.pvalue_combination_livetime,
                         enable_gracedb_upload=args.enable_gracedb_upload,
                         gracedb_testing=not args.enable_production_gracedb_upload,
-                        run_snr_optimization=args.run_snr_optimization)
+                        run_snr_optimization=args.run_snr_optimization,
+                        mc_area_args=mchirp_area.from_cli(args))
 
 sg_chisq = SingleDetSGChisq.from_cli(args, bank, args.chisq_bins)
 

diff --git a/pycbc/io/live.py b/pycbc/io/live.py
@@ -3,6 +3,7 @@
 import pycbc
 import numpy
 import lal
+import json
 from six import u as unicode
 from glue.ligolw import ligolw
 from glue.ligolw import lsctables
@@ -13,7 +14,8 @@
 from pycbc import pnutils
 from pycbc.tmpltbank import return_empty_sngl
 from pycbc.results import ifo_color
-
+from pycbc.results import source_color
+from pycbc.mchirp_area import calc_probabilities
 
 #FIXME Legacy build PSD xml helpers, delete me when we move away entirely from
 # xml formats
@@ -104,6 +106,9 @@ def __init__(self, ifos, coinc_results, **kwargs):
         channel_names: dict of strings, optional
             Strain channel names for each detector.
             Will be recorded in the sngl_inspiral table.
+        mc_area_args: dict of dicts, optional
+            Dictionary providing arguments to be used in source probability
+            estimation with pycbc/mchirp_area.py
         """
         self.template_id = coinc_results['foreground/%s/template_id' % ifos[0]]
         self.coinc_results = coinc_results
@@ -251,6 +256,17 @@ def __init__(self, ifos, coinc_results, **kwargs):
             psds_lal[ifo] = fseries
         make_psd_xmldoc(psds_lal, outdoc)
 
+        # source probabilities estimation
+        if 'mc_area_args' in kwargs:
+            eff_distances = [sngl.eff_distance for sngl in sngl_inspiral_table]
+            probabilities = calc_probabilities(coinc_inspiral_row.mchirp,
+                                               coinc_inspiral_row.snr,
+                                               min(eff_distances),
+                                               kwargs['mc_area_args'])
+            self.probabilities = probabilities
+        else:
+            self.probabilities = None
+
         self.outdoc = outdoc
         self.time = sngl_populated.get_end()
 
@@ -265,6 +281,13 @@ def save(self, filename):
         gz = filename.endswith('.gz')
         ligolw_utils.write_filename(self.outdoc, filename, gz=gz)
 
+        # save source probabilities in a json file
+        if self.probabilities is not None:
+            prob_fname = filename.replace('.xml.gz', '_probs.json')
+            with open(prob_fname, 'w') as prob_outfile:
+                json.dump(self.probabilities, prob_outfile)
+            logging.info('Source probabilities file saved as %s', prob_fname)
+
     def upload(self, fname, gracedb_server=None, testing=True,
                extra_strings=None):
         """Upload this trigger to gracedb
@@ -332,6 +355,22 @@ def upload(self, fname, gracedb_server=None, testing=True,
             pylab.xlabel('Frequency (Hz)')
             pylab.ylabel('ASD')
             pylab.savefig(psd_series_plot_fname)
+            pylab.close()
+
+        if self.probabilities is not None:
+            prob_fname = fname.replace('.xml.gz', '_probs.json')
+            prob_plot_fname = prob_fname.replace('.json', '.png')
+
+            prob_plot = {k: v for (k, v) in self.probabilities.items()
+                         if v != 0.0}
+            labels, sizes = zip(*prob_plot.items())
+            colors = [source_color(label) for label in labels]
+            fig, ax = pylab.subplots()
+            ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%',
+                   textprops={'fontsize': 15})
+            ax.axis('equal')
+            fig.savefig(prob_plot_fname)
+            pylab.close()
 
         gid = None
         try:
@@ -378,6 +417,17 @@ def upload(self, fname, gracedb_server=None, testing=True,
                                  filename=psd_series_plot_fname,
                                  tag_name=['psd'], displayName=['PSDs'])
 
+            # upload source probabilities in json format and plot
+            if self.probabilities is not None:
+                gracedb.writeLog(gid, 'source probabilities JSON file upload',
+                                 filename=prob_fname, tag_name=['em_follow'])
+                logging.info('Uploaded source probabilities for event %s', gid)
+                gracedb.writeLog(gid, 'source probabilities plot upload',
+                                 filename=prob_plot_fname,
+                                 tag_name=['em_follow'])
+                logging.info('Uploaded source probabilities pie chart for '
+                             'event %s', gid)
+
         except Exception as exc:
             logging.error('Something failed during the upload/annotation of '
                           'event %s on GraceDB. The event may not have been '

diff --git a/pycbc/mchirp_area.py b/pycbc/mchirp_area.py
@@ -1,15 +1,72 @@
-# Integration of the area laying in the different cbc regions
-# By A. Curiel Barroso
-# August 2019
+# Module with utilities for estimating candidate events source probabilities
+# Initial code by A. Curiel Barroso, August 2019
+# Modified by V. Villa-Ortega, January 2020
 
 """Functions to compute the area corresponding to different CBC on the m1 & m2
 plane when given a central mchirp value and uncertainty.
 It also includes a function that calculates the source frame when given the
 detector frame mass and redshift.
 """
 
+import math
 from pycbc.conversions import mass2_from_mchirp_mass1 as m2mcm1
 from scipy.integrate import quad
+from pycbc.cosmology import _redshift
+
+
+def insert_args(parser):
+    mchirp_group = parser.add_argument_group("Arguments for estimating the "
+                                             "source probabilities of a "
+                                             "candidate event using the snr, "
+                                             "mchirp, and effective distance.")
+    mchirp_group.add_argument('--src-class-mass-range', type=float, nargs=2,
+                              metavar=('MIN_M2', 'MAX_M1'),
+                              default=[1.0, 45.0],
+                              help="Minimum and maximum values for the mass "
+                                   "of the binary components, used as limits "
+                                   "of the mass plane when computing the area "
+                                   "corresponding to different CBC sources.")
+    mchirp_group.add_argument('--src-class-mass-gap', type=float, nargs=2,
+                              metavar=('MAX_NS', 'MIN_BH'), default=[3.0, 5.0],
+                              help="Limits of the mass gap, that correspond "
+                                   "to the maximum mass of a neutron star "
+                                   "and the minimum mass of a black hole. "
+                                   "Used as limits of integration of the "
+                                   "different CBC regions.")
+    mchirp_group.add_argument('--src-class-mchirp-to-delta', type=float,
+                              metavar='m0', default=0.01,
+                              help='Coefficient to estimate the value of the '
+                                   'mchirp uncertainty by mchirp_delta = '
+                                   'm0 * mchirp.')
+    mchirp_group.add_argument('--src-class-eff-to-lum-distance', type=float,
+                              metavar='a0', default=0.759,
+                              help='Coefficient to estimate the value of the '
+                                   'luminosity distance from the minimum '
+                                   'eff distance by D_lum = a0 * min(D_eff).')
+    mchirp_group.add_argument('--src-class-lum-distance-to-delta', type=float,
+                              nargs=2, metavar=('b0', 'b1'),
+                              default=[-0.449, -0.342],
+                              help='Coefficients to estimate the value of the '
+                                   'uncertainty on the luminosity distance '
+                                   'from the estimated luminosity distance and'
+                                   ' the coinc snr by delta_lum = D_lum * '
+                                   'exp(b0) * coinc_snr ** b1.')
+    mchirp_group.add_argument('--src-class-mass-gap-separate',
+                              action='store_true',
+                              help='Gives separate probabilities for each kind'
+                                   ' of mass gap CBC sources: GNS, GG, BHG.')
+
+
+def from_cli(args):
+    return {'mass_limits': {'max_m1': args.src_class_mass_range[1],
+                            'min_m2': args.src_class_mass_range[0]},
+            'mass_bdary': {'ns_max': args.src_class_mass_gap[0],
+                           'gap_max': args.src_class_mass_gap[1]},
+            'estimation_coeff': {'a0': args.src_class_eff_to_lum_distance,
+                                 'b0': args.src_class_lum_distance_to_delta[0],
+                                 'b1': args.src_class_lum_distance_to_delta[1],
+                                 'm0': args.src_class_mchirp_to_delta},
+            'mass_gap': args.src_class_mass_gap_separate}
 
 
 def src_mass_from_z_det_mass(z, del_z, mdet, del_mdet):
@@ -22,23 +79,15 @@ def src_mass_from_z_det_mass(z, del_z, mdet, del_mdet):
     return (msrc, del_msrc)
 
 
-# Integration function
-def mchange(x, mc):
-    """Returns a component mass as a function of mchirp and the other
-    component mass.
-    """
-    return m2mcm1(mc, x)
-
-
 def intmc(mc, x_min, x_max):
-    """Returns the integral of mchange between the minimum and maximum values
-    of a component mass taking mchirp as an argument.
+    """Returns the integral of a component mass as a function of the mass of
+       the other component, taking mchirp as an argument.
     """
-    integral = quad(mchange, x_min, x_max, args=mc)
+    integral = quad(lambda x, mc: m2mcm1(mc, x), x_min, x_max, args=mc)
     return integral[0]
 
 
-def calc_areas(trig_mc_det, mass_limits, mass_bdary, z):
+def calc_areas(trig_mc_det, mass_limits, mass_bdary, z, mass_gap):
     """Computes the area inside the lines of the second component mass as a
     function of the first component mass for the two extreme values
     of mchirp: mchirp +/- mchirp_uncertainty, for each region of the source
@@ -227,12 +276,58 @@ def calc_areas(trig_mc_det, mass_limits, mass_bdary, z):
         int_inf_nsbh = ints_nsbh + intline_inf_nsbh
 
         ansbh = int_sup_nsbh - int_inf_nsbh
-
+    if mass_gap:
+        return {
+            "BNS": abns,
+            "GNS": agns,
+            "NSBH": ansbh,
+            "GG": agg,
+            "BHG": abhg,
+            "BBH": abbh
+            }
     return {
-        "bns": abns,
-        "gns": agns,
-        "nsbh": ansbh,
-        "gg": agg,
-        "bhg": abhg,
-        "bbh": abbh
+        "BNS": abns,
+        "NSBH": ansbh,
+        "BBH": abbh,
+        "Mass Gap": agns + agg + abhg
         }
+
+
+def calc_probabilities(mchirp, snr, eff_distance, src_args):
+    """Computes the different probabilities that a candidate event belongs to
+       each CBC source category taking as arguments the chirp mass, the
+       coincident SNR and the effective distance, and estimating the
+       chirp mass uncertainty, the luminosity distance (and its uncertainty)
+       and the redshift (and its uncertainty). Probability estimation is done
+       assuming it is directly proportional to the area laying in the
+       correspondent CBC region.
+    """
+    mass_limits = src_args['mass_limits']
+    mass_bdary = src_args['mass_bdary']
+    coeff = src_args['estimation_coeff']
+    trig_mc_det = {'central': mchirp, 'delta': mchirp * coeff['m0']}
+    dist_estimation = coeff['a0'] * eff_distance
+    dist_std_estimation = (dist_estimation * math.exp(coeff['b0']) *
+                           snr ** coeff['b1'])
+    z_estimation = _redshift(dist_estimation)
+    z_est_max = _redshift(dist_estimation + dist_std_estimation)
+    z_est_min = _redshift(dist_estimation - dist_std_estimation)
+    z_std_estimation = 0.5 * (z_est_max - z_est_min)
+    z = {'central': z_estimation, 'delta': z_std_estimation}
+    mass_gap = src_args['mass_gap']
+
+    # If the mchirp is greater than the mchirp corresponding to two masses
+    # equal to the maximum mass, the probability for BBH is 100%
+    mc_max = mass_limits['max_m1'] / (2 ** 0.2)
+    if trig_mc_det['central'] > mc_max * (1 + z['central']):
+        if mass_gap is not False:
+            probabilities = {"BNS": 0.0, "GNS": 0.0, "NSBH": 0.0, "GG": 0.0,
+                             "BHG": 0.0, "BBH": 1.0}
+        else:
+            probabilities = {"BNS": 0.0, "NSBH": 0.0, "BBH": 1.0,
+                             "Mass Gap": 0.0}
+    else:
+        areas = calc_areas(trig_mc_det, mass_limits, mass_bdary, z, mass_gap)
+        total_area = sum(areas.values())
+        probabilities = {key: areas[key]/total_area for key in areas}
+    return probabilities
diff --git a/pycbc/results/color.py b/pycbc/results/color.py
@@ -10,6 +10,20 @@
     'V1': '#9b59b6',  # magenta/purple
 }
 
+_source_color_map = {
+    'BNS': '#A2C8F5',   # light blue
+    'NSBH': '#FFB482',  # light orange
+    'BBH': '#FE9F9B',   # light red
+    'Mass Gap': '#8EE5A1',  # light green
+    'GNS': '#98D6CB',   # turquoise
+    'GG': '#79BB87',    # green
+    'BHG': '#C6C29E'    # dark khaki
+}
+
 
 def ifo_color(ifo):
     return _ifo_color_map[ifo]
+
+
+def source_color(source):
+    return _source_color_map[source]