diff --git a/bin/hdfcoinc/pycbc_coinc_findtrigs b/bin/hdfcoinc/pycbc_coinc_findtrigs index 00a9e733d3a..467b93881a7 100644 --- a/bin/hdfcoinc/pycbc_coinc_findtrigs +++ b/bin/hdfcoinc/pycbc_coinc_findtrigs @@ -84,7 +84,7 @@ class ReadByTemplate(object): self.segs = veto.start_end_to_segments(s, e).coalesce() for vfile, name in zip(veto_files, segment_name): veto_segs = veto.select_segments_by_definer(vfile, ifo=self.ifo, - segment_name=name) + segment_name=name) self.segs = (self.segs - veto_segs).coalesce() self.valid = veto.segments_to_start_end(self.segs) @@ -204,8 +204,15 @@ if args.timeslide_interval is None: logging.info('The coincidence window is %3.1f ms' % (time_window * 1000)) -data = {'stat':[], 'decimation_factor':[], 'time1':[], 'time2':[], - 'trigger_id1':[], 'trigger_id2':[], 'timeslide_id':[], 'template_id':[]} +data = {'stat': [], + 'decimation_factor': [], + 'time1': [], + 'time2': [], + 'trigger_id1': [], + 'trigger_id2': [], + 'timeslide_id': [], + 'template_id':[] +} if args.randomize_template_order: seed(0) diff --git a/bin/hdfcoinc/pycbc_multiifo_add_statmap b/bin/hdfcoinc/pycbc_multiifo_add_statmap index d1345457e17..38e6d19be58 100755 --- a/bin/hdfcoinc/pycbc_multiifo_add_statmap +++ b/bin/hdfcoinc/pycbc_multiifo_add_statmap @@ -98,7 +98,8 @@ if args.output_coinc_types: fg_coinc_type = np.array([]) for f_in in files: key = get_ifo_string(f_in).replace(' ','') - combo_repeat = np.array(np.repeat(key.encode('utf8'), f_in['foreground/fap'].size)) + combo_repeat = np.array(np.repeat(key.encode('utf8'), + f_in['foreground/fap'].size)) fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat]) f['foreground/ifo_combination'] = fg_coinc_type @@ -179,7 +180,7 @@ for key in all_ifo_combos: is_in_combo_time[key][idx_within_segment] = np.ones_like(idx_within_segment) del idx_within_segment -logging.info('Calculating false alarm rate over all coinc types for foreground events') +logging.info('Calculating FAR over all coinc types for foreground events') far = {} far_exc = {} diff --git a/bin/hdfcoinc/pycbc_multiifo_coinc_findtrigs b/bin/hdfcoinc/pycbc_multiifo_coinc_findtrigs index fb2074faa4c..afcb0951b5c 100644 --- a/bin/hdfcoinc/pycbc_multiifo_coinc_findtrigs +++ b/bin/hdfcoinc/pycbc_multiifo_coinc_findtrigs @@ -1,5 +1,6 @@ #!/usr/bin/env python import h5py, argparse, logging, numpy, numpy.random +from ligo.segments import infinity from pycbc.events import veto, coinc, stat import pycbc.version from numpy.random import seed, shuffle @@ -175,27 +176,39 @@ num_templates = len(h5py.File(args.template_bank, "r")['template_hash']) tmin, tmax = parse_template_range(num_templates, args.template_fraction_range) logging.info('Analyzing template %s - %s' % (tmin, tmax-1)) -# Create dictionary for trigger files indexed on ifo name -trigs = {} +class MultiifoTrigs(object): + """store trigger info in parallel with ifo name and shift vector""" + def __init__(self): + self.ifos = [] + self.to_shift = [] + self.singles = [] + +trigs = MultiifoTrigs() for i in range(len(args.trigger_files)): logging.info('Opening trigger file %s: %s' % (i,args.trigger_files[i])) reader = ReadByTemplate(args.trigger_files[i], - args.template_bank, - args.segment_name, - args.veto_files) - trigs[reader.ifo] = reader + args.template_bank, + args.segment_name, + args.veto_files) + ifo = reader.ifo + trigs.ifos.append(ifo) + # time shift is subtracted from pivot ifo time + trigs.to_shift.append(-1 if ifo == args.pivot_ifo else 0) + logging.info('Applying time shift multiple %i to ifo %s' % + (trigs.to_shift[-1], trigs.ifos[-1])) + trigs.singles.append(reader) # Coinc_segs contains only segments where all ifos are analyzed -coinc_segs = trigs[args.pivot_ifo].segs -for ifo in trigs: - coinc_segs = (coinc_segs & trigs[ifo].segs).coalesce() - -for ifo in trigs: - trigs[ifo].segs = coinc_segs - trigs[ifo].valid = veto.segments_to_start_end(trigs[ifo].segs) +coinc_segs = veto.start_end_to_segments([-infinity()], [infinity()]) +for i, sngl in zip(trigs.ifos, trigs.singles): + coinc_segs = (coinc_segs & sngl.segs) +for sngl in trigs.singles: + sngl.segs = coinc_segs + sngl.valid = veto.segments_to_start_end(sngl.segs) # Stat class instance to calculate the coinc ranking statistic -rank_method = stat.get_statistic(args.ranking_statistic)(args.statistic_files) +rank_method = stat.get_statistic(args.ranking_statistic)(args.statistic_files, + ifos=trigs.ifos) # Sanity check, time slide interval should be larger than twice the # Earth crossing time, which is approximately 0.085 seconds. @@ -223,21 +236,25 @@ data = {'stat': [], 'decimation_factor': [], 'timeslide_id': [], 'template_id': for tnum in template_ids: tids = {} - for ifo in trigs: - tids[ifo] = trigs[ifo].set_template(tnum) - - for ifo in tids: - if not len(tids[ifo]): # no triggers in this template - continue - - times_full = {ifo:trigs[ifo]['end_time'] for ifo in trigs} - logging.info('Trigs for template %s, '% (tnum)) - for ifo in times_full: - logging.info('%s:%s' % (ifo, len(times_full[ifo]))) - - logging.info('Calculating Single Detector Statistic') - sds_full = {ifo: rank_method.single(trigs[ifo]) for ifo in trigs} - + for i, sngl in zip(trigs.ifos, trigs.singles): + # restrict trigger information to current template + tids[i] = sngl.set_template(tnum) + + mintrigs = min([len(ti) for ti in tids.values()]) + if mintrigs == 0: + logging.info('No triggers in at least one ifo for template %i, ' + 'skipping' % tnum) + continue + + times_full = {} + sds_full = {} + logging.info('Obtaining trigs for template %i ..' % (tnum)) + for i, sngl in zip(trigs.ifos, trigs.singles): + logging.info('%s:%s' % (i, len(tids[i]))) + times_full[i] = sngl['end_time'] + # get single-detector statistic + sds_full[i] = rank_method.single(sngl) + # Loop over the single triggers and calculate the coincs they can form start0 = 0 while start0 < len(sds_full[args.pivot_ifo]): @@ -264,16 +281,15 @@ for tnum in template_ids: args.coinc_threshold, args.pivot_ifo, args.fixed_ifo) - logging.info('Coincident Trigs: %s' % (len(ids[args.pivot_ifo]))) - logging.info('Calculating Multi-Detector Combined Statistic') - sdswithids = {} - for ifo in sds: - sdswithids[ifo] = sds[ifo][ids[ifo]] - cstat = rank_method.coinc_multiifo(sdswithids, - slide, - args.timeslide_interval, - time_addition=\ - args.coinc_threshold) + logging.info('Coincident trigs: %s' % (len(ids[args.pivot_ifo]))) + + logging.info('Calculating multi-detector combined statistic') + # list in ifo order of remaining trigger data + single_info = [(i, sds[i][ids[i]]) for i in trigs.ifos] + cstat = rank_method.coinc_multiifo( + single_info, slide, args.timeslide_interval, + to_shift=trigs.to_shift, + time_addition=args.coinc_threshold) # index values of the zerolag triggers fi = numpy.where(slide == 0)[0] @@ -361,24 +377,21 @@ if len(data['stat']) > 0: shuffle=True) # Store coinc segments keyed by detector combination -key = ''.join(sorted(trigs)) -f['segments/%s/start' % key], f['segments/%s/end' % key] = trigs[args.pivot_ifo].valid +key = ''.join(sorted(trigs.ifos)) +f['segments/%s/start' % key], f['segments/%s/end' % key] = trigs.singles[0].valid f.attrs['timeslide_interval'] = args.timeslide_interval f.attrs['num_of_ifos'] = len(args.trigger_files) f.attrs['pivot'] = args.pivot_ifo f.attrs['fixed'] = args.fixed_ifo -for ifo in trigs: - f.attrs['%s_foreground_time' % ifo] = abs(trigs[ifo].segs) +for i, sngl in zip(trigs.ifos, trigs.singles): + f.attrs['%s_foreground_time' % i] = abs(sngl.segs) f.attrs['coinc_time'] = abs(coinc_segs) -f.attrs['ifos'] = ' '.join(sorted(trigs)) +f.attrs['ifos'] = ' '.join(sorted(trigs.ifos)) # What does this code actually calculate? if args.timeslide_interval: - maxtrigs = abs(trigs[args.pivot_ifo].segs) - for ifo in trigs: - if abs(trigs[ifo].segs) > maxtrigs: - maxtrigs = abs(trigs[ifo].segs) + maxtrigs = max([abs(sngl.segs) for sngl in trigs.singles]) nslides = int(maxtrigs / args.timeslide_interval) else: nslides = 0 diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index aaec9433502..9aabf0a9d72 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -21,18 +21,20 @@ # # ============================================================================= # -""" This module contains functions for calculating coincident ranking -statistic values """ +This module contains functions for calculating coincident ranking statistic +values. +""" +import logging import numpy from . import ranking from . import coinc_rate class Stat(object): + """Base class which should be extended to provide a coincident statistic""" - """ Base class which should be extended to provide a coincident statistic""" - def __init__(self, files): + def __init__(self, files=None, ifos=None): """Create a statistic class instance Parameters @@ -42,12 +44,20 @@ def __init__(self, files): construct the coincident statistics. The files must have a 'stat' attribute which is used to associate them with the appropriate statistic class. + + ifos: list of detector names, optional """ import h5py + self.files = {} + files = files or [] for filename in files: f = h5py.File(filename, 'r') stat = (f.attrs['stat']).decode() + if stat in self.files: + raise RuntimeError("We already have one file with stat attr =" + " %s. Can't provide more than one!" % stat) + logging.info("Found file %s for stat %s", filename, stat) self.files[stat] = f # Provide the dtype of the single detector method's output @@ -55,10 +65,11 @@ def __init__(self, files): # a buffer of such values. self.single_dtype = numpy.float32 + self.ifos = ifos or [] -class NewSNRStatistic(Stat): - """ Calculate the NewSNR coincident detection statistic """ +class NewSNRStatistic(Stat): + """Calculate the NewSNR coincident detection statistic""" def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr @@ -94,26 +105,30 @@ def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument """ return (s0 ** 2. + s1 ** 2.) ** 0.5 - def coinc_multiifo(self, s, slide, step, + def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument """Calculate the coincident detection statistic. + Parameters ---------- - s: dictionary keyed by ifo of single detector ranking - statistics + s: list + List of (ifo, single detector statistic) tuples slide: (unused in this statistic) step: (unused in this statistic) + to_shift: list + List of integers indicating what multiples of the time shift will + be applied (unused in this statistic) + Returns ------- numpy.ndarray Array of coincident ranking statistic values """ - return sum(x ** 2. for x in s.values()) ** 0.5 + return sum(sngl[1] ** 2. for sngl in s) ** 0.5 class NewSNRSGStatistic(NewSNRStatistic): - - """ Calculate the NewSNRSG coincident detection statistic """ + """Calculate the NewSNRSG coincident detection statistic""" def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr_sgveto @@ -132,12 +147,11 @@ def single(self, trigs): class NewSNRSGPSDStatistic(NewSNRSGStatistic): - - """ Calculate the NewSNRSGPSD coincident detection statistic """ + """Calculate the NewSNRSGPSD coincident detection statistic""" def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr - combined with sgveto and psdvar statistic + combined with sgveto and psdvar statistic Parameters ---------- @@ -152,7 +166,6 @@ def single(self, trigs): class NetworkSNRStatistic(NewSNRStatistic): - """Same as the NewSNR statistic, but just sum of squares of SNRs""" def single(self, trigs): @@ -160,7 +173,6 @@ def single(self, trigs): class NewSNRCutStatistic(NewSNRStatistic): - """Same as the NewSNR statistic, but demonstrates a cut of the triggers""" def single(self, trigs): @@ -198,52 +210,82 @@ def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument cstat: numpy.ndarray Array of coincident ranking statistic values """ - cstat = (s0**2. + s1**2.) ** 0.5 - cstat[s0==-1] = 0 - cstat[s1==-1] = 0 + cstat = (s0 ** 2. + s1 ** 2.) ** 0.5 + cstat[s0 == -1] = 0 + cstat[s1 == -1] = 0 return cstat class PhaseTDStatistic(NewSNRStatistic): - """Statistic that re-weights combined newsnr using coinc parameters. The weighting is based on the PDF of time delays, phase differences and amplitude ratios between triggers in different ifos. """ - def __init__(self, files): - NewSNRStatistic.__init__(self, files) - self.hist = self.files['phasetd_newsnr']['map'][:] - # Normalize so that peak has no effect on newsnr - self.hist = self.hist / float(self.hist.max()) - self.hist = numpy.log(self.hist) - - # Bin boundaries are stored in the hdf file - self.tbins = self.files['phasetd_newsnr']['tbins'][:] - self.pbins = self.files['phasetd_newsnr']['pbins'][:] - self.sbins = self.files['phasetd_newsnr']['sbins'][:] - self.rbins = self.files['phasetd_newsnr']['rbins'][:] + def __init__(self, files, ifos=None): + NewSNRStatistic.__init__(self, files, ifos=ifos) self.single_dtype = [('snglstat', numpy.float32), - ('coa_phase', numpy.float32), - ('end_time', numpy.float64), - ('sigmasq', numpy.float32), - ('snr', numpy.float32)] + ('coa_phase', numpy.float32), + ('end_time', numpy.float64), + ('sigmasq', numpy.float32), + ('snr', numpy.float32) + ] # Assign attribute so that it can be replaced with other functions self.get_newsnr = ranking.get_newsnr + self.hist = None + self.bins = {} + self.hist_ifos = [] + + def get_hist(self, ifos=None, norm='max'): + """Read in a signal density file for the ifo combination""" + + # default name for old 2-ifo workflow + if 'phasetd_newsnr' in self.files: + histfile = self.files['phasetd_newsnr'] + else: + ifos = ifos or self.ifos # if None, use the instance attribute + if len(ifos) != 2: + raise RuntimeError("Need exactly 2 ifos for the p/t/a " + "statistic! Ifos given were " + ifos) + matching = [k for k in self.files.keys() if \ + 'phasetd' in k and (ifos[0] in k and ifos[1] in k)] + if len(matching) == 1: + histfile = self.files[matching[0]] + else: + raise RuntimeError( + "%i statistic files had an attribute matching phasetd*%s%s !" + "Should be exactly 1" % (len(matching), ifos[0], ifos[1])) + logging.info("Using signal histogram %s for ifos %s", matching, + ifos) + + self.hist = histfile['map'][:] + self.hist_ifos = ifos + + if norm == 'max': + # Normalize so that peak of hist is equal to unity + self.hist = self.hist / float(self.hist.max()) + self.hist = numpy.log(self.hist) + else: + raise NotImplementedError("Sorry, we have no other normalizations") + + # Bin boundaries are stored in the hdf file + self.bins['dt'] = histfile['tbins'][:] + self.bins['dphi'] = histfile['pbins'][:] + self.bins['snr'] = histfile['sbins'][:] + self.bins['sigma_ratio'] = histfile['rbins'][:] + def single(self, trigs): - """ - Calculate the single detector statistic and assemble other parameters + """Calculate the single detector statistic & assemble other parameters Parameters ---------- - trigs: dict of numpy.ndarrays, h5py group (or similar dict-like object) - Dictionary-like object holding single detector trigger information. - 'chisq_dof', 'snr', 'chisq', 'coa_phase', 'end_time', and 'sigmasq' - are required keys. + trigs: dict of numpy.ndarrays, h5py group or similar dict-like object + Object holding single detector trigger information. 'snr', 'chisq', + 'chisq_dof', 'coa_phase', 'end_time', and 'sigmasq' are required keys. Returns ------- @@ -259,15 +301,10 @@ def single(self, trigs): singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1) - def logsignalrate(self, s0, s1, slide, step): - """Calculate the normalized log rate density of signals via lookup""" - td = numpy.array(s0['end_time'] - s1['end_time'] - slide*step, ndmin=1) - pd = numpy.array((s0['coa_phase'] - s1['coa_phase']) % \ - (2. * numpy.pi), ndmin=1) - rd = numpy.array((s0['sigmasq'] / s1['sigmasq']) ** 0.5, ndmin=1) - sn0 = numpy.array(s0['snr'], ndmin=1) - sn1 = numpy.array(s1['snr'], ndmin=1) + def signal_hist(self, td, pd, sn0, sn1, rd): + assert self.hist is not None + # enforce that sigma ratio is < 1 by swapping values snr0 = sn0 * 1 snr1 = sn1 * 1 @@ -276,30 +313,95 @@ def logsignalrate(self, s0, s1, slide, step): rd[rd > 1] = 1. / rd[rd > 1] # Find which bin each coinc falls into - tv = numpy.searchsorted(self.tbins, td) - 1 - pv = numpy.searchsorted(self.pbins, pd) - 1 - s0v = numpy.searchsorted(self.sbins, snr0) - 1 - s1v = numpy.searchsorted(self.sbins, snr1) - 1 - rv = numpy.searchsorted(self.rbins, rd) - 1 + tv = numpy.searchsorted(self.bins['dt'], td) - 1 + pv = numpy.searchsorted(self.bins['dphi'], pd) - 1 + s0v = numpy.searchsorted(self.bins['snr'], snr0) - 1 + s1v = numpy.searchsorted(self.bins['snr'], snr1) - 1 + rv = numpy.searchsorted(self.bins['sigma_ratio'], rd) - 1 - # Enforce that points fits into the bin boundaries: if a point lies + # Enforce that points fit into the bin boundaries: if a point lies # outside the boundaries it is pushed back to the nearest bin. - tv[tv < 0] = 0 - tv[tv >= len(self.tbins) - 1] = len(self.tbins) - 2 - pv[pv < 0] = 0 - pv[pv >= len(self.pbins) - 1] = len(self.pbins) - 2 - s0v[s0v < 0] = 0 - s0v[s0v >= len(self.sbins) - 1] = len(self.sbins) - 2 - s1v[s1v < 0] = 0 - s1v[s1v >= len(self.sbins) - 1] = len(self.sbins) - 2 - rv[rv < 0] = 0 - rv[rv >= len(self.rbins) - 1] = len(self.rbins) - 2 + for binnum, axis in zip([tv, pv, rv, s0v, s1v], + ['dt', 'dphi', 'sigma_ratio', 'snr', 'snr']): + binend = len(self.bins[axis]) + binnum[binnum < 0] = 0 + binnum[binnum >= binend - 1] = binend - 2 return self.hist[tv, pv, s0v, s1v, rv] - def coinc(self, s0, s1, slide, step): + def slide_dt(self, singles, shift, slide_vec): + # Apply time shifts in the multiples specified by slide_vec + # and return resulting time difference + assert len(singles) == 2 + assert len(slide_vec) == 2 + dt = singles[0]['end_time'] + shift * slide_vec[0] -\ + (singles[1]['end_time'] + shift * slide_vec[1]) + return dt + + def logsignalrate(self, s0, s1, shift): + """Calculate the normalized log rate density of signals via lookup""" + + # does not require ifos to be specified, only 1 p/t/a file + if self.hist is None: + self.get_hist() + else: + logging.info("Using pre-set signal histogram") + + # for 2-ifo pipeline, add time shift to 2nd ifo ('s1') + slidevec = [0, 1] + td = numpy.array(self.slide_dt([s0, s1], shift, slidevec), + ndmin=1) + if numpy.any(td > 1.): + raise RuntimeError( + "Time difference bigger than 1 second after applying any time " + "shifts! This should not happen") + pd = numpy.array((s0['coa_phase'] - s1['coa_phase']) % \ + (2. * numpy.pi), ndmin=1) + sn0 = numpy.array(s0['snr'], ndmin=1) + sn1 = numpy.array(s1['snr'], ndmin=1) + rd = numpy.array((s0['sigmasq'] / s1['sigmasq']) ** 0.5, ndmin=1) + + return self.signal_hist(td, pd, sn0, sn1, rd) + + def logsignalrate_multiifo(self, s, shift, to_shift): + """ + Parameters + ---------- + s: list, length 2 + List of sets of single-ifo trigger parameter values + shift: numpy.ndarray + Array of floats giving the time shifts to be applied with + multiples given by to_shift + to_shift: list, length 2 + List of time shift multiples """ - Calculate the coincident detection statistic. + assert len(s) == 2 + assert len(to_shift) == 2 + + # At present for triples use the H/L signal histogram + hist_ifos = self.ifos if len(self.ifos) == 2 else ['H1', 'L1'] + if self.hist is None: + self.get_hist(hist_ifos) + else: + assert self.hist_ifos == hist_ifos + logging.info("Using pre-set signal histogram for %s", + self.hist_ifos) + + td = self.slide_dt(s, shift, to_shift) + if numpy.any(td > 1.): + raise RuntimeError( + "Time difference bigger than 1 second after applying any time " + "shifts! This should not happen") + pd = numpy.array((s[0]['coa_phase'] - s[1]['coa_phase']) % \ + (2. * numpy.pi), ndmin=1) + sn0 = numpy.array(s[0]['snr'], ndmin=1) + sn1 = numpy.array(s[1]['snr'], ndmin=1) + rd = numpy.array((s[0]['sigmasq'] / s[1]['sigmasq']) ** 0.5, ndmin=1) + + return self.signal_hist(td, pd, sn0, sn1, rd) + + def coinc(self, s0, s1, slide, step): + """Calculate the coincident detection statistic. Parameters ---------- @@ -318,8 +420,8 @@ def coinc(self, s0, s1, slide, step): coinc_stat: numpy.ndarray An array of the coincident ranking statistic values """ - rstat = s0['snglstat']**2. + s1['snglstat']**2. - cstat = rstat + 2. * self.logsignalrate(s0, s1, slide, step) + rstat = s0['snglstat'] ** 2. + s1['snglstat'] ** 2. + cstat = rstat + 2. * self.logsignalrate(s0, s1, slide * step) cstat[cstat < 0] = 0 return cstat ** 0.5 @@ -327,36 +429,35 @@ def coinc(self, s0, s1, slide, step): class PhaseTDSGStatistic(PhaseTDStatistic): """PhaseTDStatistic but with sine-Gaussian veto added to the - single detector ranking + single-detector ranking """ - - def __init__(self, files): - PhaseTDStatistic.__init__(self, files) + def __init__(self, files, ifos=None): + PhaseTDStatistic.__init__(self, files, ifos=ifos) self.get_newsnr = ranking.get_newsnr_sgveto class ExpFitStatistic(NewSNRStatistic): - """Detection statistic using an exponential falloff noise model. Statistic approximates the negative log noise coinc rate density per template over single-ifo newsnr values. """ - def __init__(self, files): + def __init__(self, files, ifos=None): if not len(files): raise RuntimeError("Can't find any statistic files !") - NewSNRStatistic.__init__(self, files) + NewSNRStatistic.__init__(self, files, ifos=ifos) + # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' parsed_attrs = [f.split('-') for f in self.files.keys()] - self.ifos = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'fit_coeffs')] - if not len(self.ifos): + self.bg_ifos = [at[0] for at in parsed_attrs if + (len(at) == 2 and at[1] == 'fit_coeffs')] + if not len(self.bg_ifos): raise RuntimeError("None of the statistic files has the required " "attribute called {ifo}-fit_coeffs !") self.fits_by_tid = {} self.alphamax = {} - for i in self.ifos: + for i in self.bg_ifos: self.fits_by_tid[i] = self.assign_fits(i) self.get_ref_vals(i) @@ -370,22 +471,25 @@ def assign_fits(self, ifo): # the template_ids and fit coeffs are stored in an arbitrary order # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) - return {'alpha':alphas[tid_sort], 'rate':rates[tid_sort], - 'thresh':coeff_file.attrs['stat_threshold']} + return {'alpha': alphas[tid_sort], + 'rate': rates[tid_sort], + 'thresh': coeff_file.attrs['stat_threshold'] + } def get_ref_vals(self, ifo): self.alphamax[ifo] = self.fits_by_tid[ifo]['alpha'].max() def find_fits(self, trigs): """Get fit coeffs for a specific ifo and template id(s)""" + try: tnum = trigs.template_num # exists if accessed via coinc_findtrigs ifo = trigs.ifo except AttributeError: tnum = trigs['template_id'] # exists for SingleDetTriggers - # Should only be one ifo fit file provided - assert len(self.ifos) == 1 - ifo = self.ifos[0] + # Should be exactly one ifo fit file provided + assert len(self.bg_ifos) == 1 + ifo = self.bg_ifos[0] # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id alphai = self.fits_by_tid[ifo]['alpha'][tnum] @@ -394,8 +498,7 @@ def find_fits(self, trigs): return alphai, ratei, thresh def lognoiserate(self, trigs): - """ - Calculate the log noise rate density over single-ifo newsnr + """Calculate the log noise rate density over single-ifo newsnr Read in single trigger information, make the newsnr statistic and rescale by the fitted coefficients alpha and rate @@ -403,7 +506,7 @@ def lognoiserate(self, trigs): alphai, ratei, thresh = self.find_fits(trigs) newsnr = self.get_newsnr(trigs) # alphai is constant of proportionality between single-ifo newsnr and - # negative log noise likelihood in given template + # negative log noise likelihood in given template # ratei is rate of trigs in given template compared to average # thresh is stat threshold used in given ifo lognoisel = - alphai * (newsnr - thresh) + numpy.log(alphai) + \ @@ -412,15 +515,17 @@ def lognoiserate(self, trigs): def single(self, trigs): """Single-detector statistic, here just equal to the log noise rate""" + return self.lognoiserate(trigs) def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument """Calculate the final coinc ranking statistic""" + # Approximate log likelihood ratio by summing single-ifo negative # log noise likelihoods loglr = - s0 - s1 # add squares of threshold stat values via idealized Gaussian formula - threshes = [self.fits_by_tid[i]['thresh'] for i in self.ifos] + threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] loglr += sum([t**2. / 2. for t in threshes]) # convert back to a coinc-SNR-like statistic # via log likelihood ratio \propto rho_c^2 / 2 @@ -428,21 +533,20 @@ def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument class ExpFitCombinedSNR(ExpFitStatistic): - """Reworking of ExpFitStatistic designed to resemble network SNR Use a monotonic function of the negative log noise rate density which approximates combined (new)snr for coincs with similar newsnr in each ifo """ - def __init__(self, files): - ExpFitStatistic.__init__(self, files) + def __init__(self, files, ifos=None): + ExpFitStatistic.__init__(self, files, ifos=ifos) # for low-mass templates the exponential slope alpha \approx 6 self.alpharef = 6. def use_alphamax(self): # take reference slope as the harmonic mean of individual ifo slopes - inv_alphas = [1. / self.alphamax[i] for i in self.ifos] + inv_alphas = [1. / self.alphamax[i] for i in self.bg_ifos] self.alpharef = 1. / (sum(inv_alphas) / len(inv_alphas)) def single(self, trigs): @@ -458,45 +562,43 @@ def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument # scale by 1/sqrt(2) to resemble network SNR return (s0 + s1) / 2.**0.5 - def coinc_multiifo(self, s, slide, - step, **kwargs): # pylint:disable=unused-argument + def coinc_multiifo(self, s, slide, step, to_shift, + **kwargs): # pylint:disable=unused-argument # scale by 1/sqrt(number of ifos) to resemble network SNR - return sum(x for x in s.values()) / len(s)**0.5 + return sum(sngl[1] for sngl in s) / len(s)**0.5 class ExpFitSGCombinedSNR(ExpFitCombinedSNR): + """ExpFitCombinedSNR but with sine-Gaussian veto added to the single - """ExpFitCombinedSNR but with sine-Gaussian veto added to the - - single detector ranking + detector ranking """ - def __init__(self, files): - ExpFitCombinedSNR.__init__(self, files) + def __init__(self, files, ifos=None): + ExpFitCombinedSNR.__init__(self, files, ifos=ifos) self.get_newsnr = ranking.get_newsnr_sgveto class ExpFitSGPSDCombinedSNR(ExpFitCombinedSNR): - """ExpFitCombinedSNR but with sine-Gaussian veto and PSD variation added to the single detector ranking """ - def __init__(self, files): - ExpFitCombinedSNR.__init__(self, files) + def __init__(self, files, ifos=None): + ExpFitCombinedSNR.__init__(self, files, ifos=ifos) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar class PhaseTDExpFitStatistic(PhaseTDStatistic, ExpFitCombinedSNR): - """Statistic combining exponential noise model with signal histogram PDF""" - def __init__(self, files): + # default is 2-ifo operation with exactly 1 'phasetd' file + def __init__(self, files, ifos=None): # read in both foreground PDF and background fit info - ExpFitCombinedSNR.__init__(self, files) + ExpFitCombinedSNR.__init__(self, files, ifos=ifos) # need the self.single_dtype value from PhaseTDStatistic - PhaseTDStatistic.__init__(self, files) + PhaseTDStatistic.__init__(self, files, ifos=ifos) def single(self, trigs): # same single-ifo stat as ExpFitCombinedSNR @@ -511,7 +613,7 @@ def single(self, trigs): def coinc(self, s0, s1, slide, step): # logsignalrate function inherited from PhaseTDStatistic - logr_s = self.logsignalrate(s0, s1, slide, step) + logr_s = self.logsignalrate(s0, s1, slide * step) # rescale by ExpFitCombinedSNR reference slope as for sngl stat cstat = s0['snglstat'] + s1['snglstat'] + logr_s / self.alpharef # cut off underflowing and very small values @@ -521,34 +623,33 @@ def coinc(self, s0, s1, slide, step): class PhaseTDExpFitSGStatistic(PhaseTDExpFitStatistic): - """Statistic combining exponential noise model with signal histogram PDF - and adding the sine-Gaussian veto to the single detector ranking + + adding the sine-Gaussian veto to the single detector ranking """ - def __init__(self, files): - PhaseTDExpFitStatistic.__init__(self, files) + def __init__(self, files, ifos=None): + PhaseTDExpFitStatistic.__init__(self, files, ifos=ifos) self.get_newsnr = ranking.get_newsnr_sgveto class PhaseTDExpFitSGPSDStatistic(PhaseTDExpFitSGStatistic): - """Statistic combining exponential noise model with signal histogram PDF - and adding the sine-Gaussian veto and PSD variation statistic to the - single detector ranking + + adding the sine-Gaussian veto and PSD variation statistic to the + single detector ranking """ - def __init__(self, files): - PhaseTDExpFitSGStatistic.__init__(self, files) + def __init__(self, files, ifos=None): + PhaseTDExpFitSGStatistic.__init__(self, files, ifos=ifos) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar class MaxContTradNewSNRStatistic(NewSNRStatistic): - """Combination of NewSNR with the power chisq and auto chisq""" def single(self, trigs): - """ Calculate the single detector statistic. + """Calculate the single detector statistic. Parameters ---------- @@ -570,64 +671,68 @@ def single(self, trigs): class ExpFitSGBgRateStatistic(ExpFitStatistic): - """Detection statistic using an exponential falloff noise model. + Statistic calculates the log noise coinc rate for each template over single-ifo newsnr values. """ - def __init__(self, files, benchmark_lograte=-14.6): + def __init__(self, files, ifos=None, benchmark_lograte=-14.6): # benchmark_lograte is log of a representative noise trigger rate # This comes from H1L1 (O2) and is 4.5e-7 Hz - super(ExpFitSGBgRateStatistic, self).__init__(files) + super(ExpFitSGBgRateStatistic, self).__init__(files, ifos=ifos) self.benchmark_lograte = benchmark_lograte self.get_newsnr = ranking.get_newsnr_sgveto - # Reassign the rate as it is now number per time rather than an - # arbitrarily normalised number - for ifo in self.ifos: + + # Reassign the rate to be number per time rather than an arbitrarily + # normalised number + for ifo in self.bg_ifos: self.reassign_rate(ifo) def reassign_rate(self, ifo): coeff_file = self.files[ifo+'-fit_coeffs'] template_id = coeff_file['template_id'][:] - # the template_ids and fit coeffs are stored in an arbitrary order - # create new arrays in template_id order for easier recall + # create arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) self.fits_by_tid[ifo]['rate'] = \ coeff_file['count_above_thresh'][:][tid_sort] / \ float(coeff_file.attrs['analysis_time']) - def coinc_multiifo(self, s, slide, - step, **kwargs): # pylint:disable=unused-argument + def coinc_multiifo(self, s, slide, step, to_shift, + **kwargs): # pylint:disable=unused-argument # ranking statistic is -ln(expected rate density of noise triggers) # plus normalization constant + sngl_dict = {sngl[0]: sngl[1] for sngl in s} ln_noise_rate = coinc_rate.combination_noise_lograte( - s, kwargs['time_addition']) + sngl_dict, kwargs['time_addition']) loglr = - ln_noise_rate + self.benchmark_lograte return loglr class ExpFitSGFgBgRateStatistic(PhaseTDStatistic, ExpFitSGBgRateStatistic): - def __init__(self, files): - # read in background fit info and store it, also use newsnr_sgveto - ExpFitSGBgRateStatistic.__init__(self, files) - # Use PhaseTD statistic single.dtype - PhaseTDStatistic.__init__(self, files) - for ifo in self.ifos: - self.assign_median_sigma(ifo) + + def __init__(self, files, ifos=None): + # read in background fit info and store it + ExpFitSGBgRateStatistic.__init__(self, files, ifos=ifos) + # if ifos not already set, determine via background fit info + self.ifos = self.ifos or self.bg_ifos + # PhaseTD statistic single_dtype plus network sensitivity benchmark + PhaseTDStatistic.__init__(self, files, ifos=self.ifos) self.single_dtype.append(('benchmark_logvol', numpy.float32)) + self.get_newsnr = ranking.get_newsnr_sgveto + + for ifo in self.bg_ifos: + self.assign_median_sigma(ifo) # benchmark_logvol is a benchmark sensitivity array over template id hl_net_med_sigma = numpy.amin([self.fits_by_tid[ifo]['median_sigma'] for ifo in ['H1', 'L1']], axis=0) self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) - self.get_newsnr = ranking.get_newsnr_sgveto def assign_median_sigma(self, ifo): - coeff_file = self.files[ifo+'-fit_coeffs'] + coeff_file = self.files[ifo + '-fit_coeffs'] template_id = coeff_file['template_id'][:] tid_sort = numpy.argsort(template_id) - self.fits_by_tid[ifo]['median_sigma'] = \ coeff_file['median_sigma'][:][tid_sort] @@ -653,27 +758,40 @@ def single(self, trigs): singles['benchmark_logvol'] = self.benchmark_logvol[tnum] return numpy.array(singles, ndmin=1) - def coinc_multiifo(self, s, slide, - step, **kwargs): # pylint:disable=unused-argument - sngl_rates = {ifo: sngl_data['snglstat'] for ifo, sngl_data in - s.items()} + def coinc_multiifo(self, s, slide, step, to_shift, + **kwargs): # pylint:disable=unused-argument + sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_rates, kwargs['time_addition']) + ln_noise_rate -= self.benchmark_lograte + # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo - network_sigmasq = numpy.amin([s[ifo]['sigmasq'] for ifo in s.keys()], + network_sigmasq = numpy.amin([sngl[1]['sigmasq'] for sngl in s], axis=0) # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) - # Get benchmark log volume as single-ifo information - # NB, benchmark logvol for a given template is not ifo-dependent - # so choose one ifo for convenience - ifos = s.keys() - benchmark_logvol = s[ifos[0]]['benchmark_logvol'] - - loglr = - ln_noise_rate + self.benchmark_lograte \ - + network_logvol - benchmark_logvol + # NB benchmark logvol for a given template is not ifo-dependent + # - choose the first ifo for convenience + benchmark_logvol = s[0][1]['benchmark_logvol'] + network_logvol -= benchmark_logvol + + coincifos = [sngl[0] for sngl in s] + # logsignalrate function from PhaseTDStatistic + if ('H1' in coincifos and 'L1' in coincifos): + # apply HL hist for HL & HLV coincs, keep only H/L info + s_hl = [sngl[1] for sngl in s if sngl[0] in ['H1', 'L1']] + shift_hl = [sh for sngl, sh in zip(s, to_shift) if \ + sngl[0] in ['H1', 'L1']] + logr_s = self.logsignalrate_multiifo(s_hl, slide * step, shift_hl) + else: + logr_s = self.logsignalrate_multiifo([sngl[1] for sngl in s], + slide * step, to_shift) + + loglr = logr_s + network_logvol - ln_noise_rate + # cut off underflowing and very small values + loglr[loglr < -30.] = -30. return loglr @@ -710,6 +828,7 @@ def coinc_multiifo(self, s, slide, 'exp_fit_sg_csnr_psdvar': ExpFitSGPSDCombinedSNR } + def get_statistic(stat): """ Error-handling sugar around dict lookup for coincident statistics @@ -734,6 +853,7 @@ def get_statistic(stat): except KeyError: raise RuntimeError('%s is not an available detection statistic' % stat) + def get_sngl_statistic(stat): """ Error-handling sugar around dict lookup for single-detector statistics