Skip to content

Commit

Permalink
Cluster over stat instead of ifar. Make the background calculation _m…
Browse files Browse the repository at this point in the history
…uch_ faster.
  • Loading branch information
Gareth Davies committed Jul 2, 2019
1 parent 2a84923 commit 618c6d7
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 103 deletions.
164 changes: 66 additions & 98 deletions bin/hdfcoinc/pycbc_multiifo_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ parser.add_argument("--version", action="version", version=pycbc.version.git_ver
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--statmap-files', nargs='+',
help="List of coinc files to be redistributed")
parser.add_argument('--censor-ifar-threshold', type=float, default=0.003,
help="If provided, only window out foreground triggers with IFAR (years)"
"above the threshold [default=0.003yr]")
parser.add_argument('--veto-window', type=float, default=0.1,
help="Time around each zerolag trigger to window out [default=.1s]")
parser.add_argument('--cluster-window', type=float)
parser.add_argument('--output-file', help="name of output file")
args = parser.parse_args()
Expand All @@ -28,34 +33,27 @@ f = h5py.File(args.output_file, "w")

logging.info('Copying segments and attributes to %s' % args.output_file)
# Move segments information into the final file - remove some duplication
# in earlier files
for fi in files:
for key in fi['segments']:
if key.startswith('foreground') or key.startswith('background'):
continue
f['segments/%s/end' % key] = fi['segments/%s/end' % key][:]
f['segments/%s/start' % key] = fi['segments/%s/start' % key][:]
if 'segments/foreground_veto' in fi:
f['segments/%s/foreground_veto/end' % key] = \
fi['segments/foreground_veto/end'][:]
f['segments/%s/foreground_veto/start' % key] = \
fi['segments/foreground_veto/start'][:]
for attr_name in fi.attrs:
if key not in f:
f.create_group(key)
f[key].attrs[attr_name] = fi.attrs[attr_name]

logging.info('Combining foreground and foreground excluded segments')
# Set up dictionaries to contain segments from the individual statmap files
# in earlier files. Also set up dictionaries to contain segments from the
# individual statmap files
indiv_segs = segments.segmentlistdict({})

# loop through statmap files and put segments into segmentlistdicts
for fi in files:
key = fi.attrs['ifos'].replace(' ','')
# get analysed segments from individual statmap files
starts = fi['segments/{}/start'.format(key)][:]
ends = fi['segments/{}/end'.format(key)][:]
indiv_segs[key] = pycbc.events.veto.start_end_to_segments(starts, ends)
f['segments/{}/start'.format(key)] = starts
f['segments/{}/end'.format(key)] = ends
if 'segments/foreground_veto' in fi:
f['segments/%s/foreground_veto/end' % key] = \
fi['segments/foreground_veto/end'][:]
f['segments/%s/foreground_veto/start' % key] = \
fi['segments/foreground_veto/start'][:]
for attr_name in fi.attrs:
if key not in f:
f.create_group(key)
f[key].attrs[attr_name] = fi.attrs[attr_name]

logging.info('Combining foreground segments')

# Convert segmentlistdict to a list ('seglists') of segmentlists
# then np.sum(seglists, axis=0) does seglists[0] + seglists[1] + ...
Expand All @@ -67,17 +65,10 @@ all_ifos = np.unique([ifo for fi in files
# output to file
f.attrs['foreground_time'] = abs(foreground_segs)

logging.info('Copying foreground & background common datasets')
keys_to_copy = ['decimation_factor', 'stat']
fg_bg = ['foreground','background', 'background_exc']
for fg_type in fg_bg:
for k in keys_to_copy:
pycbc.io.combine_and_copy(f, files, fg_type + '/' + k)

fg_only_keys_to_copy = ['template_id','timeslide_id', 'ifar', 'ifar_exc']
logging.info('Copying foreground-only datasets')
for k in fg_only_keys_to_copy:
pycbc.io.combine_and_copy(f, files, 'foreground/' + k)
logging.info('Copying foreground datasets')
for k in files[0]['foreground']:
if not k.startswith('fap') and k not in all_ifos:
pycbc.io.combine_and_copy(f, files, 'foreground/' + k)

logging.info('Collating triggers into single structure')

Expand Down Expand Up @@ -108,51 +99,18 @@ for ifo in all_ifos:
f['foreground/{}/time'.format(ifo)] = all_trig_times[ifo]
f['foreground/{}/trigger_id'.format(ifo)] = all_trig_ids[ifo]

logging.info('Getting ifo combination information for each coincidence')
for f_in in files:
key = f_in.attrs['ifos'].replace(' ','')

for fg_type in fg_bg:
ifo_combo_key = fg_type + '/ifo_combination'
fg_comb_repeat = np.array(np.repeat(key.encode('utf8'),
f_in[fg_type + '/stat'].size))
if ifo_combo_key in f:
ifo_comb_fg = f[ifo_combo_key][:]
del f[ifo_combo_key]
ifo_comb_fg = np.concatenate([ifo_comb_fg, fg_comb_repeat])
else:
ifo_comb_fg = fg_comb_repeat

f[ifo_combo_key]=ifo_comb_fg

del fg_comb_repeat, ifo_comb_fg

logging.info('Working available ifo combinations are available for each '
'coincidence')

logging.info('Finding indices of which background events are from which detector combination')

where_combo = {ifo_c:np.where(f['background/ifo_combination'][:]==ifo_c)[0]
for ifo_c in f['segments'] if ifo_c is not 'foreground_veto'}
where_combo_exc = {ifo_c:np.where(f['background_exc/ifo_combination'][:]==ifo_c)[0]
for ifo_c in f['segments'] if ifo_c is not 'foreground_veto'}

logging.info('{} triggers'.format(f['foreground/ifar'].size))
ifar_stat = np.core.records.fromarrays([f['foreground/ifar'][:],
f['foreground/stat'][:]],
names='ifar,stat')
n_triggers = f['foreground/ifar'].size
logging.info('{} triggers'.format(n_triggers))

# all_times is a tuple of trigger time arrays
all_times = (f['foreground/%s/time' % ifo][:] for ifo in all_ifos)

def argmax(v):
return np.argsort(v)[-1]

# Currently only clustering zerolag, i.e. foreground, so set all timeslide_ids
# to zero
cidx = pycbc.events.cluster_coincs_multiifo(ifar_stat, all_times,
np.zeros(len(ifar_stat)), 0,
args.cluster_window, argmax)
cidx = pycbc.events.cluster_coincs_multiifo(f['foreground/stat'][:], all_times,
np.zeros(n_triggers), 0,
args.cluster_window, argmax=np.argmax)


def filter_dataset(h5file, name, idx):
# Dataset needs to be deleted and remade as it is a different size
Expand All @@ -169,35 +127,49 @@ for key in f['foreground'].keys():
for k in f['foreground/%s' % key].keys():
filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)

n_triggers = f['foreground/ifar'].size

times_tuple = (f['foreground/{}/time'.format(ifo)] for ifo in all_ifos)
test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0]
for tc in zip(*times_tuple)])

is_in_combo_time = {}
for key in f['segments']:
is_in_combo_time[key] = np.zeros_like(f['foreground/decimation_factor'][:])
is_in_combo_time[key] = np.zeros(n_triggers)
if key.startswith('foreground') or key.startswith('background'):
continue
end_times = np.array(f['segments/%s/end' % key][:])
start_times = np.array(f['segments/%s/start' % key][:])
idx_within_segment = pycbc.events.indices_within_times(test_times,
start_times,
end_times)
is_in_combo_time[key][idx_within_segment] += np.ones_like(idx_within_segment)
is_in_combo_time[key][idx_within_segment] = np.ones_like(idx_within_segment)
del idx_within_segment


all_indices = np.arange(f['foreground/decimation_factor'].size)
available_combos =[' '.join(sorted([key for key in is_in_combo_time if is_in_combo_time[key][i]])).encode('utf8') for i in all_indices]
del all_indices
f['foreground/available_combinations'] = available_combos
available_combos =[' '.join(sorted([key for key in is_in_combo_time if is_in_combo_time[key][i]])).encode('utf8') for i in np.arange(n_triggers)]

all_combo_types = np.unique(available_combos)
idx = {ct:np.where(np.array(available_combos)==ct)[0]
for ct in all_combo_types}

del available_combos

logging.info('Calculating n_louder background triggers in each type for all foreground events')

fnlouder = {}
fnlouder_exc = {}
for f_in in files:
ifo_combo_key = f_in.attrs['ifos'].replace(' ','')
_, fnlouder[ifo_combo_key] = coinc.calculate_n_louder(
f_in['background/stat'][:],
f['foreground/stat'][:],
f_in['background/decimation_factor'][:]
)
_, fnlouder_exc[ifo_combo_key] = coinc.calculate_n_louder(
f_in['background_exc/stat'][:],
f['foreground/stat'][:],
f_in['background_exc/decimation_factor'][:]
)
logging.info('Recalculating ifar according to summed trigger distributions')

fg_ifar = np.zeros_like(f['foreground/decimation_factor'][:])
Expand All @@ -216,30 +188,26 @@ for ct in all_combo_types:
largest_combination = cts[np.argmax([len(ifo_c) for ifo_c in cts])]
bg_time = f[largest_combination].attrs['background_time']
bg_time_exc = f[largest_combination].attrs['background_time_exc']
inc_bg_list = [where_combo[ifo_c] for ifo_c in cts]
inc_bg = list(itertools.chain(*inc_bg_list))
inc_bg_exc_list = [where_combo_exc[ifo_c] for ifo_c in cts]
inc_bg_exc = list(itertools.chain(*inc_bg_exc_list))
_, fnlouder = coinc.calculate_n_louder(f['background/stat'][:][inc_bg],
f['foreground/stat'][:][idx[ct]],
f['background/decimation_factor'][:][inc_bg])
_, fnlouder_exc = coinc.calculate_n_louder(
f['background_exc/stat'][:][inc_bg_exc],
f['foreground/stat'][:][idx[ct]],
f['background_exc/decimation_factor'][:][inc_bg_exc]
)
fnlouder = np.sum([fnlouder[ifo_combo_key][idx[ct]] for ifo_combo_key in cts])
fnlouder_exc = np.sum([fnlouder_exc[ifo_combo_key][idx[ct]] for ifo_combo_key in cts])
ifar = bg_time / (fnlouder + 1)
ifar_exc = bg_time_exc / (fnlouder_exc + 1)
fg_ifar[idx[ct]] = conv.sec_to_year(ifar)
fg_ifar_exc[idx[ct]] = conv.sec_to_year(ifar_exc)

for bg_type in ['background', 'background_exc']:
for k in ['stat','decimation_factor', 'ifo_combination']:
print(bg_type + '/' + k)
if bg_type + '/' + k in f:
print('deleting')
del f[bg_type + '/' + k]
else: print('not deleting')
f.attrs['foreground_time_exc'] = f.attrs['foreground_time']

# Construct the foreground censor veto from the clustered candidate times
# above the ifar threshold
thr = test_times[fg_ifar > args.censor_ifar_threshold]
vstart = thr - args.veto_window
vend = thr + args.veto_window
vtime = segments.segmentlist([segments.segment(s, e)
for s, e in zip(vstart, vend)])
logging.info('Censoring %.2f seconds', abs(vtime))
f.attrs['foreground_time_exc'] -= abs(vtime)
f['segments/foreground_veto/start'] = vstart
f['segments/foreground_veto/end'] = vend

f['foreground/ifar'][:] = fg_ifar
f['foreground/fap'] = 1 - np.exp(-f.attrs['foreground_time'] / fg_ifar)
Expand Down
13 changes: 8 additions & 5 deletions bin/hdfcoinc/pycbc_multiifo_coinc_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,17 @@ else:

logging.info("We have %s triggers" % len(all_trigs.stat))
fore_locs = all_trigs.timeslide_id == 0
# Foreground trigger times for ifo.
# Foreground trigger times for ifos
fore_time = {}
for ifo in ifos:
fore_time[ifo] = all_trigs.data['%s/time' % ifo][fore_locs]
# Average times of triggers from ifo1 and ifo2
ave_fore_time = 0
for ifo in ifos:
ave_fore_time += fore_time[ifo] / len(ifos)
# Average times of triggers (note that coincs where not all ifos have triggers
# will have some -1 sentinel values)
fore_time_zip = zip(*fore_time.values())
ave_fore_time = []
for ts in fore_time_zip:
ave_fore_time.append(coinc.mean_if_greater_than_zero(ts)[0])
ave_fore_time = numpy.array(ave_fore_time)

# Remove start and end time around every average foreground trigger time to
# window around.
Expand Down

0 comments on commit 618c6d7

Please sign in to comment.