adding add_statmap code (gwastro#2811)

* adding add_statmap code to utilise different backgrounds for ifar calculation * Cluster over stat instead of ifar. Make the background calculation _much_ faster. * axis needed for np sum in far calculation
OliverEdy · Apr 3, 2023 · 825518e · 825518e
1 parent 5f70867
commit 825518e
Show file tree

Hide file tree

Showing 2 changed files with 219 additions and 2 deletions.
diff --git a/bin/hdfcoinc/pycbc_multiifo_add_statmap b/bin/hdfcoinc/pycbc_multiifo_add_statmap
@@ -0,0 +1,217 @@
+#!/bin/env python
+""" Calculate total FAR based on statistic ranking for coincidences in times
+with more than one ifo combination available. Cluster to keep coincs with the
+highest stat value . This clusters to find the most significant foreground,
+but leaves the background triggers alone.
+"""
+
+import h5py, numpy as np, argparse, logging, pycbc, pycbc.events, pycbc.io
+import pycbc.version
+import pycbc.conversions as conv
+from pycbc.events import coinc
+from ligo import segments
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg)
+parser.add_argument('--verbose', action='store_true')
+parser.add_argument('--statmap-files', nargs='+',
+                    help="List of coinc files to be combined")
+parser.add_argument('--censor-ifar-threshold', type=float, default=0.003,
+    help="If provided, only window out foreground triggers with IFAR (years)"
+         "above the threshold [default=0.003yr]")
+parser.add_argument('--veto-window', type=float, default=0.1,
+    help="Time around each zerolag trigger to window out [default=.1s]")
+parser.add_argument('--cluster-window', type=float,
+    help="Maximum time interval to cluster coincident events")
+parser.add_argument('--output-file', help="name of output file")
+args = parser.parse_args()
+
+pycbc.init_logging(args.verbose)
+
+files = [h5py.File(n, 'r') for n in args.statmap_files]
+
+f = h5py.File(args.output_file, "w")
+
+logging.info('Copying segments and attributes to %s' % args.output_file)
+# Move segments information into the final file - remove some duplication
+# in earlier files. Also set up dictionaries to contain segments from the
+# individual statmap files
+indiv_segs = segments.segmentlistdict({})
+for fi in files:
+    key = fi.attrs['ifos'].replace(' ','')
+    starts = fi['segments/{}/start'.format(key)][:]
+    ends = fi['segments/{}/end'.format(key)][:]
+    indiv_segs[key] = pycbc.events.veto.start_end_to_segments(starts, ends)
+    f['segments/{}/start'.format(key)] = starts
+    f['segments/{}/end'.format(key)] = ends
+    if 'segments/foreground_veto' in fi:
+        f['segments/%s/foreground_veto/end' % key] = \
+                                         fi['segments/foreground_veto/end'][:]
+        f['segments/%s/foreground_veto/start' % key] = \
+                                       fi['segments/foreground_veto/start'][:]
+    for attr_name in fi.attrs:
+        if key not in f:
+            f.create_group(key)
+        f[key].attrs[attr_name] = fi.attrs[attr_name]
+
+logging.info('Combining foreground segments')
+
+# Convert segmentlistdict to a list ('seglists') of segmentlists
+# then np.sum(seglists, axis=0) does seglists[0] + seglists[1] + ...
+foreground_segs = np.sum(list(indiv_segs.values()), axis=0)
+f.attrs['foreground_time'] = abs(foreground_segs)
+
+# obtain list of all ifos involved in the coinc_statmap files
+all_ifos = np.unique([ifo for fi in files
+                      for ifo in fi.attrs['ifos'].split(' ')])
+
+logging.info('Copying foreground datasets')
+for k in files[0]['foreground']:
+    if not k.startswith('fap') and k not in all_ifos:
+        pycbc.io.combine_and_copy(f, files, 'foreground/' + k)
+
+logging.info('Collating triggers into single structure')
+
+all_trig_times = {}
+all_trig_ids = {}
+for ifo in all_ifos:
+    all_trig_times[ifo] = np.array([], dtype=np.uint32)
+    all_trig_ids[ifo] = np.array([], dtype=np.uint32)
+
+# For each file, append the trigger time and id data for each ifo
+# If an ifo does not participate in any given coinc then fill with -1 values
+for f_in in files:
+    for ifo in all_ifos:
+        if ifo in f_in['foreground']:
+            all_trig_times[ifo] = np.concatenate([all_trig_times[ifo], \
+                                    f_in['foreground/{}/time'.format(ifo)][:]])
+            all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo],
+                              f_in['foreground/{}/trigger_id'.format(ifo)][:]])
+        else:
+            all_trig_times[ifo] = np.concatenate([all_trig_times[ifo],
+                                 -1*np.ones_like(f_in['foreground/fap'][:],
+                                                    dtype=np.uint32)])
+            all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo],
+                                 -1*np.ones_like(f_in['foreground/fap'][:],
+                                                    dtype=np.uint32)])
+
+for ifo in all_ifos:
+    f['foreground/{}/time'.format(ifo)] = all_trig_times[ifo]
+    f['foreground/{}/trigger_id'.format(ifo)] = all_trig_ids[ifo]
+
+n_triggers = f['foreground/ifar'].size
+logging.info('{} triggers'.format(n_triggers))
+
+# all_times is a tuple of trigger time arrays
+all_times = (f['foreground/%s/time' % ifo][:] for ifo in all_ifos)
+
+# Cluster by statistic value. Currently only clustering zerolag,
+# i.e. foreground, so set all timeslide_ids to zero
+cidx = pycbc.events.cluster_coincs_multiifo(f['foreground/stat'][:], all_times,
+                                            np.zeros(n_triggers), 0,
+                                            args.cluster_window)
+
+
+def filter_dataset(h5file, name, idx):
+    # Dataset needs to be deleted and remade as it is a different size
+    filtered_dset = h5file[name][:][idx]
+    del h5file[name]
+    h5file[name] = filtered_dset
+
+# Downsample the foreground columns to only the loudest ifar between the
+# multiple files
+for key in f['foreground'].keys():
+    if key not in all_ifos:
+        filter_dataset(f, 'foreground/%s' % key, cidx)
+    else:  # key is an ifo
+        for k in f['foreground/%s' % key].keys():
+            filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)
+
+n_triggers = f['foreground/ifar'].size
+
+# Calculating event times to determine which types of coinc are available
+times_tuple = (f['foreground/{}/time'.format(ifo)] for ifo in all_ifos)
+test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0]
+                          for tc in zip(*times_tuple)])
+
+is_in_combo_time = {}
+for key in f['segments']:
+    is_in_combo_time[key] = np.zeros(n_triggers)
+    if key.startswith('foreground') or key.startswith('background'):
+        continue
+    end_times = np.array(f['segments/%s/end' % key][:])
+    start_times = np.array(f['segments/%s/start' % key][:])
+    idx_within_segment = pycbc.events.indices_within_times(test_times,
+                                                           start_times,
+                                                           end_times)
+    is_in_combo_time[key][idx_within_segment] = np.ones_like(idx_within_segment)
+del idx_within_segment
+
+# available_combos is a string containing a space-separated list of which
+# interferometer combinations are available at the time of coincidence
+available_combos =[' '.join(sorted([key for key in is_in_combo_time 
+                                    if is_in_combo_time[key][i]])
+                            ).encode('utf8') for i in np.arange(n_triggers)]
+
+all_combo_types = np.unique(available_combos)
+idx = {ct:np.where(np.array(available_combos)==ct)[0]
+       for ct in all_combo_types}
+
+del available_combos
+
+logging.info('Calculating false alarm rate over all coinc types for foreground events')
+
+far = {}
+far_exc = {}
+for f_in in files:
+        ifo_combo_key = f_in.attrs['ifos'].replace(' ','')
+        _, fnlouder = coinc.calculate_n_louder(f_in['background/stat'][:],
+                                               f['foreground/stat'][:],
+                                               f_in['background/decimation_factor'][:])
+        far[ifo_combo_key] = (fnlouder + 1) / f_in.attrs['background_time']
+        _, fnlouder_exc = coinc.calculate_n_louder(f_in['background_exc/stat'][:],
+                                                   f['foreground/stat'][:],
+                                                   f_in['background_exc/decimation_factor'][:])
+        far_exc[ifo_combo_key] = (fnlouder_exc + 1) / f_in.attrs['background_time_exc']
+
+fg_ifar = np.zeros(n_triggers)
+fg_ifar_exc = np.zeros(n_triggers)
+
+for ct in all_combo_types:
+    cts = ct.split(' ')
+    if len(cts) == 1:
+        logging.info('IFAR is the same as previously calculated for triggers in {} time'.format(ct))
+        # If only one combination is available, then the stat is the same
+        # as previously calulated
+        fg_ifar[idx[ct]] = f['foreground/ifar'][:][idx[ct]]
+        fg_ifar_exc[idx[ct]] = f['foreground/ifar_exc'][:][idx[ct]]
+    elif len(cts) > 1:
+        logging.info('Recalculating ifar for coincidences which are in {} time'.format(ct))
+        far_new = np.sum([far[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0)
+        far_new_exc = np.sum([far_exc[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0)
+        fg_ifar[idx[ct]] = conv.sec_to_year(1. / np.array(far_new))
+        fg_ifar_exc[idx[ct]] = conv.sec_to_year(1. / np.array(far_new_exc))
+    else:
+        raise RuntimeError('Empty combo type string, something has gone wrong')
+
+f.attrs['foreground_time_exc'] = f.attrs['foreground_time']
+
+# Construct the foreground censor veto from the clustered candidate times
+# above the ifar threshold
+thr = test_times[fg_ifar > args.censor_ifar_threshold]
+vstart = thr - args.veto_window
+vend = thr + args.veto_window
+vtime = segments.segmentlist([segments.segment(s, e)
+                              for s, e in zip(vstart, vend)])
+logging.info('Censoring %.2f seconds', abs(vtime))
+f.attrs['foreground_time_exc'] -= abs(vtime)
+f['segments/foreground_veto/start'] = vstart
+f['segments/foreground_veto/end'] = vend
+
+f['foreground/ifar'][:] = fg_ifar
+f['foreground/fap'] = 1 - np.exp(-f.attrs['foreground_time'] / fg_ifar)
+f['foreground/ifar_exc'][:] = fg_ifar_exc
+f['foreground/fap_exc'] = 1 - np.exp(-f.attrs['foreground_time_exc'] / fg_ifar_exc)
+
+f.close()
+logging.info('Done!')
diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py
@@ -600,8 +600,8 @@ def reassign_rate(self, ifo):
     def coinc_multiifo(self, s, slide,
                        step, **kwargs): # pylint:disable=unused-argument
         """Calculate the final coinc ranking statistic"""
-        sngl_rates_dict = {ifo: numpy.exp(sngl_rate) for (ifo, sngl_rate) in\
-                           zip(self.fits_by_tid.keys(), s.values())}
+        sngl_rates_dict = {ifo: numpy.exp(log_rate) for (ifo, log_rate)\
+                           in s.items()}
         ln_coinc_rate = numpy.log(coinc_rate.combination_noise_coinc_rate(
                                   sngl_rates_dict, kwargs['time_addition']))
         loglr = - ln_coinc_rate + self.benchmark_lograte