Skip to content

Commit

Permalink
adding add_statmap code (gwastro#2811)
Browse files Browse the repository at this point in the history
* adding add_statmap code to utilise different backgrounds for ifar calculation

* Cluster over stat instead of ifar. Make the background calculation _much_ faster.

* axis needed for np sum in far calculation
  • Loading branch information
Gareth Davies authored and OliverEdy committed Apr 3, 2023
1 parent 5f70867 commit 825518e
Show file tree
Hide file tree
Showing 2 changed files with 219 additions and 2 deletions.
217 changes: 217 additions & 0 deletions bin/hdfcoinc/pycbc_multiifo_add_statmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/bin/env python
""" Calculate total FAR based on statistic ranking for coincidences in times
with more than one ifo combination available. Cluster to keep coincs with the
highest stat value . This clusters to find the most significant foreground,
but leaves the background triggers alone.
"""

import h5py, numpy as np, argparse, logging, pycbc, pycbc.events, pycbc.io
import pycbc.version
import pycbc.conversions as conv
from pycbc.events import coinc
from ligo import segments

parser = argparse.ArgumentParser()
parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg)
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--statmap-files', nargs='+',
help="List of coinc files to be combined")
parser.add_argument('--censor-ifar-threshold', type=float, default=0.003,
help="If provided, only window out foreground triggers with IFAR (years)"
"above the threshold [default=0.003yr]")
parser.add_argument('--veto-window', type=float, default=0.1,
help="Time around each zerolag trigger to window out [default=.1s]")
parser.add_argument('--cluster-window', type=float,
help="Maximum time interval to cluster coincident events")
parser.add_argument('--output-file', help="name of output file")
args = parser.parse_args()

pycbc.init_logging(args.verbose)

files = [h5py.File(n, 'r') for n in args.statmap_files]

f = h5py.File(args.output_file, "w")

logging.info('Copying segments and attributes to %s' % args.output_file)
# Move segments information into the final file - remove some duplication
# in earlier files. Also set up dictionaries to contain segments from the
# individual statmap files
indiv_segs = segments.segmentlistdict({})
for fi in files:
key = fi.attrs['ifos'].replace(' ','')
starts = fi['segments/{}/start'.format(key)][:]
ends = fi['segments/{}/end'.format(key)][:]
indiv_segs[key] = pycbc.events.veto.start_end_to_segments(starts, ends)
f['segments/{}/start'.format(key)] = starts
f['segments/{}/end'.format(key)] = ends
if 'segments/foreground_veto' in fi:
f['segments/%s/foreground_veto/end' % key] = \
fi['segments/foreground_veto/end'][:]
f['segments/%s/foreground_veto/start' % key] = \
fi['segments/foreground_veto/start'][:]
for attr_name in fi.attrs:
if key not in f:
f.create_group(key)
f[key].attrs[attr_name] = fi.attrs[attr_name]

logging.info('Combining foreground segments')

# Convert segmentlistdict to a list ('seglists') of segmentlists
# then np.sum(seglists, axis=0) does seglists[0] + seglists[1] + ...
foreground_segs = np.sum(list(indiv_segs.values()), axis=0)
f.attrs['foreground_time'] = abs(foreground_segs)

# obtain list of all ifos involved in the coinc_statmap files
all_ifos = np.unique([ifo for fi in files
for ifo in fi.attrs['ifos'].split(' ')])

logging.info('Copying foreground datasets')
for k in files[0]['foreground']:
if not k.startswith('fap') and k not in all_ifos:
pycbc.io.combine_and_copy(f, files, 'foreground/' + k)

logging.info('Collating triggers into single structure')

all_trig_times = {}
all_trig_ids = {}
for ifo in all_ifos:
all_trig_times[ifo] = np.array([], dtype=np.uint32)
all_trig_ids[ifo] = np.array([], dtype=np.uint32)

# For each file, append the trigger time and id data for each ifo
# If an ifo does not participate in any given coinc then fill with -1 values
for f_in in files:
for ifo in all_ifos:
if ifo in f_in['foreground']:
all_trig_times[ifo] = np.concatenate([all_trig_times[ifo], \
f_in['foreground/{}/time'.format(ifo)][:]])
all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo],
f_in['foreground/{}/trigger_id'.format(ifo)][:]])
else:
all_trig_times[ifo] = np.concatenate([all_trig_times[ifo],
-1*np.ones_like(f_in['foreground/fap'][:],
dtype=np.uint32)])
all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo],
-1*np.ones_like(f_in['foreground/fap'][:],
dtype=np.uint32)])

for ifo in all_ifos:
f['foreground/{}/time'.format(ifo)] = all_trig_times[ifo]
f['foreground/{}/trigger_id'.format(ifo)] = all_trig_ids[ifo]

n_triggers = f['foreground/ifar'].size
logging.info('{} triggers'.format(n_triggers))

# all_times is a tuple of trigger time arrays
all_times = (f['foreground/%s/time' % ifo][:] for ifo in all_ifos)

# Cluster by statistic value. Currently only clustering zerolag,
# i.e. foreground, so set all timeslide_ids to zero
cidx = pycbc.events.cluster_coincs_multiifo(f['foreground/stat'][:], all_times,
np.zeros(n_triggers), 0,
args.cluster_window)


def filter_dataset(h5file, name, idx):
# Dataset needs to be deleted and remade as it is a different size
filtered_dset = h5file[name][:][idx]
del h5file[name]
h5file[name] = filtered_dset

# Downsample the foreground columns to only the loudest ifar between the
# multiple files
for key in f['foreground'].keys():
if key not in all_ifos:
filter_dataset(f, 'foreground/%s' % key, cidx)
else: # key is an ifo
for k in f['foreground/%s' % key].keys():
filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)

n_triggers = f['foreground/ifar'].size

# Calculating event times to determine which types of coinc are available
times_tuple = (f['foreground/{}/time'.format(ifo)] for ifo in all_ifos)
test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0]
for tc in zip(*times_tuple)])

is_in_combo_time = {}
for key in f['segments']:
is_in_combo_time[key] = np.zeros(n_triggers)
if key.startswith('foreground') or key.startswith('background'):
continue
end_times = np.array(f['segments/%s/end' % key][:])
start_times = np.array(f['segments/%s/start' % key][:])
idx_within_segment = pycbc.events.indices_within_times(test_times,
start_times,
end_times)
is_in_combo_time[key][idx_within_segment] = np.ones_like(idx_within_segment)
del idx_within_segment

# available_combos is a string containing a space-separated list of which
# interferometer combinations are available at the time of coincidence
available_combos =[' '.join(sorted([key for key in is_in_combo_time
if is_in_combo_time[key][i]])
).encode('utf8') for i in np.arange(n_triggers)]

all_combo_types = np.unique(available_combos)
idx = {ct:np.where(np.array(available_combos)==ct)[0]
for ct in all_combo_types}

del available_combos

logging.info('Calculating false alarm rate over all coinc types for foreground events')

far = {}
far_exc = {}
for f_in in files:
ifo_combo_key = f_in.attrs['ifos'].replace(' ','')
_, fnlouder = coinc.calculate_n_louder(f_in['background/stat'][:],
f['foreground/stat'][:],
f_in['background/decimation_factor'][:])
far[ifo_combo_key] = (fnlouder + 1) / f_in.attrs['background_time']
_, fnlouder_exc = coinc.calculate_n_louder(f_in['background_exc/stat'][:],
f['foreground/stat'][:],
f_in['background_exc/decimation_factor'][:])
far_exc[ifo_combo_key] = (fnlouder_exc + 1) / f_in.attrs['background_time_exc']

fg_ifar = np.zeros(n_triggers)
fg_ifar_exc = np.zeros(n_triggers)

for ct in all_combo_types:
cts = ct.split(' ')
if len(cts) == 1:
logging.info('IFAR is the same as previously calculated for triggers in {} time'.format(ct))
# If only one combination is available, then the stat is the same
# as previously calulated
fg_ifar[idx[ct]] = f['foreground/ifar'][:][idx[ct]]
fg_ifar_exc[idx[ct]] = f['foreground/ifar_exc'][:][idx[ct]]
elif len(cts) > 1:
logging.info('Recalculating ifar for coincidences which are in {} time'.format(ct))
far_new = np.sum([far[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0)
far_new_exc = np.sum([far_exc[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0)
fg_ifar[idx[ct]] = conv.sec_to_year(1. / np.array(far_new))
fg_ifar_exc[idx[ct]] = conv.sec_to_year(1. / np.array(far_new_exc))
else:
raise RuntimeError('Empty combo type string, something has gone wrong')

f.attrs['foreground_time_exc'] = f.attrs['foreground_time']

# Construct the foreground censor veto from the clustered candidate times
# above the ifar threshold
thr = test_times[fg_ifar > args.censor_ifar_threshold]
vstart = thr - args.veto_window
vend = thr + args.veto_window
vtime = segments.segmentlist([segments.segment(s, e)
for s, e in zip(vstart, vend)])
logging.info('Censoring %.2f seconds', abs(vtime))
f.attrs['foreground_time_exc'] -= abs(vtime)
f['segments/foreground_veto/start'] = vstart
f['segments/foreground_veto/end'] = vend

f['foreground/ifar'][:] = fg_ifar
f['foreground/fap'] = 1 - np.exp(-f.attrs['foreground_time'] / fg_ifar)
f['foreground/ifar_exc'][:] = fg_ifar_exc
f['foreground/fap_exc'] = 1 - np.exp(-f.attrs['foreground_time_exc'] / fg_ifar_exc)

f.close()
logging.info('Done!')
4 changes: 2 additions & 2 deletions pycbc/events/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,8 @@ def reassign_rate(self, ifo):
def coinc_multiifo(self, s, slide,
step, **kwargs): # pylint:disable=unused-argument
"""Calculate the final coinc ranking statistic"""
sngl_rates_dict = {ifo: numpy.exp(sngl_rate) for (ifo, sngl_rate) in\
zip(self.fits_by_tid.keys(), s.values())}
sngl_rates_dict = {ifo: numpy.exp(log_rate) for (ifo, log_rate)\
in s.items()}
ln_coinc_rate = numpy.log(coinc_rate.combination_noise_coinc_rate(
sngl_rates_dict, kwargs['time_addition']))
loglr = - ln_coinc_rate + self.benchmark_lograte
Expand Down

0 comments on commit 825518e

Please sign in to comment.