forked from gwastro/pycbc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding add_statmap code (gwastro#2811)
* adding add_statmap code to utilise different backgrounds for ifar calculation * Cluster over stat instead of ifar. Make the background calculation _much_ faster. * axis needed for np sum in far calculation
- Loading branch information
Showing
2 changed files
with
219 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
#!/bin/env python | ||
""" Calculate total FAR based on statistic ranking for coincidences in times | ||
with more than one ifo combination available. Cluster to keep coincs with the | ||
highest stat value . This clusters to find the most significant foreground, | ||
but leaves the background triggers alone. | ||
""" | ||
|
||
import h5py, numpy as np, argparse, logging, pycbc, pycbc.events, pycbc.io | ||
import pycbc.version | ||
import pycbc.conversions as conv | ||
from pycbc.events import coinc | ||
from ligo import segments | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--version", action="version", version=pycbc.version.git_verbose_msg) | ||
parser.add_argument('--verbose', action='store_true') | ||
parser.add_argument('--statmap-files', nargs='+', | ||
help="List of coinc files to be combined") | ||
parser.add_argument('--censor-ifar-threshold', type=float, default=0.003, | ||
help="If provided, only window out foreground triggers with IFAR (years)" | ||
"above the threshold [default=0.003yr]") | ||
parser.add_argument('--veto-window', type=float, default=0.1, | ||
help="Time around each zerolag trigger to window out [default=.1s]") | ||
parser.add_argument('--cluster-window', type=float, | ||
help="Maximum time interval to cluster coincident events") | ||
parser.add_argument('--output-file', help="name of output file") | ||
args = parser.parse_args() | ||
|
||
pycbc.init_logging(args.verbose) | ||
|
||
files = [h5py.File(n, 'r') for n in args.statmap_files] | ||
|
||
f = h5py.File(args.output_file, "w") | ||
|
||
logging.info('Copying segments and attributes to %s' % args.output_file) | ||
# Move segments information into the final file - remove some duplication | ||
# in earlier files. Also set up dictionaries to contain segments from the | ||
# individual statmap files | ||
indiv_segs = segments.segmentlistdict({}) | ||
for fi in files: | ||
key = fi.attrs['ifos'].replace(' ','') | ||
starts = fi['segments/{}/start'.format(key)][:] | ||
ends = fi['segments/{}/end'.format(key)][:] | ||
indiv_segs[key] = pycbc.events.veto.start_end_to_segments(starts, ends) | ||
f['segments/{}/start'.format(key)] = starts | ||
f['segments/{}/end'.format(key)] = ends | ||
if 'segments/foreground_veto' in fi: | ||
f['segments/%s/foreground_veto/end' % key] = \ | ||
fi['segments/foreground_veto/end'][:] | ||
f['segments/%s/foreground_veto/start' % key] = \ | ||
fi['segments/foreground_veto/start'][:] | ||
for attr_name in fi.attrs: | ||
if key not in f: | ||
f.create_group(key) | ||
f[key].attrs[attr_name] = fi.attrs[attr_name] | ||
|
||
logging.info('Combining foreground segments') | ||
|
||
# Convert segmentlistdict to a list ('seglists') of segmentlists | ||
# then np.sum(seglists, axis=0) does seglists[0] + seglists[1] + ... | ||
foreground_segs = np.sum(list(indiv_segs.values()), axis=0) | ||
f.attrs['foreground_time'] = abs(foreground_segs) | ||
|
||
# obtain list of all ifos involved in the coinc_statmap files | ||
all_ifos = np.unique([ifo for fi in files | ||
for ifo in fi.attrs['ifos'].split(' ')]) | ||
|
||
logging.info('Copying foreground datasets') | ||
for k in files[0]['foreground']: | ||
if not k.startswith('fap') and k not in all_ifos: | ||
pycbc.io.combine_and_copy(f, files, 'foreground/' + k) | ||
|
||
logging.info('Collating triggers into single structure') | ||
|
||
all_trig_times = {} | ||
all_trig_ids = {} | ||
for ifo in all_ifos: | ||
all_trig_times[ifo] = np.array([], dtype=np.uint32) | ||
all_trig_ids[ifo] = np.array([], dtype=np.uint32) | ||
|
||
# For each file, append the trigger time and id data for each ifo | ||
# If an ifo does not participate in any given coinc then fill with -1 values | ||
for f_in in files: | ||
for ifo in all_ifos: | ||
if ifo in f_in['foreground']: | ||
all_trig_times[ifo] = np.concatenate([all_trig_times[ifo], \ | ||
f_in['foreground/{}/time'.format(ifo)][:]]) | ||
all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo], | ||
f_in['foreground/{}/trigger_id'.format(ifo)][:]]) | ||
else: | ||
all_trig_times[ifo] = np.concatenate([all_trig_times[ifo], | ||
-1*np.ones_like(f_in['foreground/fap'][:], | ||
dtype=np.uint32)]) | ||
all_trig_ids[ifo] = np.concatenate([all_trig_ids[ifo], | ||
-1*np.ones_like(f_in['foreground/fap'][:], | ||
dtype=np.uint32)]) | ||
|
||
for ifo in all_ifos: | ||
f['foreground/{}/time'.format(ifo)] = all_trig_times[ifo] | ||
f['foreground/{}/trigger_id'.format(ifo)] = all_trig_ids[ifo] | ||
|
||
n_triggers = f['foreground/ifar'].size | ||
logging.info('{} triggers'.format(n_triggers)) | ||
|
||
# all_times is a tuple of trigger time arrays | ||
all_times = (f['foreground/%s/time' % ifo][:] for ifo in all_ifos) | ||
|
||
# Cluster by statistic value. Currently only clustering zerolag, | ||
# i.e. foreground, so set all timeslide_ids to zero | ||
cidx = pycbc.events.cluster_coincs_multiifo(f['foreground/stat'][:], all_times, | ||
np.zeros(n_triggers), 0, | ||
args.cluster_window) | ||
|
||
|
||
def filter_dataset(h5file, name, idx): | ||
# Dataset needs to be deleted and remade as it is a different size | ||
filtered_dset = h5file[name][:][idx] | ||
del h5file[name] | ||
h5file[name] = filtered_dset | ||
|
||
# Downsample the foreground columns to only the loudest ifar between the | ||
# multiple files | ||
for key in f['foreground'].keys(): | ||
if key not in all_ifos: | ||
filter_dataset(f, 'foreground/%s' % key, cidx) | ||
else: # key is an ifo | ||
for k in f['foreground/%s' % key].keys(): | ||
filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx) | ||
|
||
n_triggers = f['foreground/ifar'].size | ||
|
||
# Calculating event times to determine which types of coinc are available | ||
times_tuple = (f['foreground/{}/time'.format(ifo)] for ifo in all_ifos) | ||
test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0] | ||
for tc in zip(*times_tuple)]) | ||
|
||
is_in_combo_time = {} | ||
for key in f['segments']: | ||
is_in_combo_time[key] = np.zeros(n_triggers) | ||
if key.startswith('foreground') or key.startswith('background'): | ||
continue | ||
end_times = np.array(f['segments/%s/end' % key][:]) | ||
start_times = np.array(f['segments/%s/start' % key][:]) | ||
idx_within_segment = pycbc.events.indices_within_times(test_times, | ||
start_times, | ||
end_times) | ||
is_in_combo_time[key][idx_within_segment] = np.ones_like(idx_within_segment) | ||
del idx_within_segment | ||
|
||
# available_combos is a string containing a space-separated list of which | ||
# interferometer combinations are available at the time of coincidence | ||
available_combos =[' '.join(sorted([key for key in is_in_combo_time | ||
if is_in_combo_time[key][i]]) | ||
).encode('utf8') for i in np.arange(n_triggers)] | ||
|
||
all_combo_types = np.unique(available_combos) | ||
idx = {ct:np.where(np.array(available_combos)==ct)[0] | ||
for ct in all_combo_types} | ||
|
||
del available_combos | ||
|
||
logging.info('Calculating false alarm rate over all coinc types for foreground events') | ||
|
||
far = {} | ||
far_exc = {} | ||
for f_in in files: | ||
ifo_combo_key = f_in.attrs['ifos'].replace(' ','') | ||
_, fnlouder = coinc.calculate_n_louder(f_in['background/stat'][:], | ||
f['foreground/stat'][:], | ||
f_in['background/decimation_factor'][:]) | ||
far[ifo_combo_key] = (fnlouder + 1) / f_in.attrs['background_time'] | ||
_, fnlouder_exc = coinc.calculate_n_louder(f_in['background_exc/stat'][:], | ||
f['foreground/stat'][:], | ||
f_in['background_exc/decimation_factor'][:]) | ||
far_exc[ifo_combo_key] = (fnlouder_exc + 1) / f_in.attrs['background_time_exc'] | ||
|
||
fg_ifar = np.zeros(n_triggers) | ||
fg_ifar_exc = np.zeros(n_triggers) | ||
|
||
for ct in all_combo_types: | ||
cts = ct.split(' ') | ||
if len(cts) == 1: | ||
logging.info('IFAR is the same as previously calculated for triggers in {} time'.format(ct)) | ||
# If only one combination is available, then the stat is the same | ||
# as previously calulated | ||
fg_ifar[idx[ct]] = f['foreground/ifar'][:][idx[ct]] | ||
fg_ifar_exc[idx[ct]] = f['foreground/ifar_exc'][:][idx[ct]] | ||
elif len(cts) > 1: | ||
logging.info('Recalculating ifar for coincidences which are in {} time'.format(ct)) | ||
far_new = np.sum([far[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0) | ||
far_new_exc = np.sum([far_exc[ifo_combo_key][idx[ct]] for ifo_combo_key in cts], axis=0) | ||
fg_ifar[idx[ct]] = conv.sec_to_year(1. / np.array(far_new)) | ||
fg_ifar_exc[idx[ct]] = conv.sec_to_year(1. / np.array(far_new_exc)) | ||
else: | ||
raise RuntimeError('Empty combo type string, something has gone wrong') | ||
|
||
f.attrs['foreground_time_exc'] = f.attrs['foreground_time'] | ||
|
||
# Construct the foreground censor veto from the clustered candidate times | ||
# above the ifar threshold | ||
thr = test_times[fg_ifar > args.censor_ifar_threshold] | ||
vstart = thr - args.veto_window | ||
vend = thr + args.veto_window | ||
vtime = segments.segmentlist([segments.segment(s, e) | ||
for s, e in zip(vstart, vend)]) | ||
logging.info('Censoring %.2f seconds', abs(vtime)) | ||
f.attrs['foreground_time_exc'] -= abs(vtime) | ||
f['segments/foreground_veto/start'] = vstart | ||
f['segments/foreground_veto/end'] = vend | ||
|
||
f['foreground/ifar'][:] = fg_ifar | ||
f['foreground/fap'] = 1 - np.exp(-f.attrs['foreground_time'] / fg_ifar) | ||
f['foreground/ifar_exc'][:] = fg_ifar_exc | ||
f['foreground/fap_exc'] = 1 - np.exp(-f.attrs['foreground_time_exc'] / fg_ifar_exc) | ||
|
||
f.close() | ||
logging.info('Done!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters