Skip to content

Commit

Permalink
Reimplement PhaseTDStatistic (#3596)
Browse files Browse the repository at this point in the history
* Reimplement PhaseTDStatistic

* Fix error in PyCBC Live due to args.statistic_files being modified by from_cli()

* Switch PyCBC Live test to use the PTA statistic again

* A couple fixes from Codeclimate

* Add reference for coinc stat

* Add safety check for coinc_lim_for_thresh()
  • Loading branch information
titodalcanton authored Feb 5, 2021
1 parent 0a4dc9b commit c43fcbb
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 36 deletions.
1 change: 1 addition & 0 deletions examples/live/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
strain/
template_bank.hdf
injections.hdf
stat*.hdf
output/
15 changes: 14 additions & 1 deletion examples/live/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ else
fi


# make phase-time-amplitude histogram files, if needed

if [[ ! -f statHL.hdf ]]
then
echo -e "\\n\\n>> [`date`] Making phase-time-amplitude files"

bash ../search/stats.sh
else
echo -e "\\n\\n>> [`date`] Pre-existing phase-time-amplitude files found"
fi


# delete old outputs if they exist
rm -rf ./output

Expand Down Expand Up @@ -151,8 +163,9 @@ python -m mpi4py `which pycbc_live` \
--max-batch-size 16777216 \
--output-path output \
--day-hour-output-prefix \
--ranking-statistic quadsum \
--sngl-ranking newsnr_sgveto \
--ranking-statistic phasetd \
--statistic-files statHL.hdf statHV.hdf statLV.hdf \
--sgchisq-snr-threshold 4 \
--sgchisq-locations "mtotal>40:20-30,20-45,20-60,20-75,20-90,20-105,20-120" \
--enable-background-estimation \
Expand Down
13 changes: 6 additions & 7 deletions pycbc/events/coinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,21 +850,20 @@ def pick_best_coinc(cls, coinc_results):
@classmethod
def from_cli(cls, args, num_templates, analysis_chunk, ifos):
from . import stat

# Allow None inputs
if args.statistic_files is None:
args.statistic_files = []
if args.statistic_keywords is None:
args.statistic_keywords = []
stat_files = args.statistic_files or []
stat_keywords = args.statistic_keywords or []

# flatten the list of lists of filenames to a single list (may be empty)
args.statistic_files = sum(args.statistic_files, [])
stat_files = sum(stat_files, [])

kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)
kwargs = stat.parse_statistic_keywords_opt(stat_keywords)

return cls(num_templates, analysis_chunk,
args.ranking_statistic,
args.sngl_ranking,
args.statistic_files,
stat_files,
return_background=args.store_background,
ifar_limit=args.background_ifar_limit,
timeslide_interval=args.timeslide_interval,
Expand Down
69 changes: 41 additions & 28 deletions pycbc/events/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _check_coinc_lim_subclass(self, allowed_names):
coinc_lim_for_thresh is only defined for the statistic it is present
in. If we subclass, we must check explicitly that it is still valid and
inidicate this in the code. If the code does not have this explicit
indicate this in the code. If the code does not have this explicit
check you will see the failure message here.
Parameters
Expand Down Expand Up @@ -335,7 +335,6 @@ def get_hist(self, ifos=None):
"""
Read in a signal density file for the ifo combination
Parameters
----------
ifos: list
Expand All @@ -355,12 +354,11 @@ def get_hist(self, ifos=None):
if num != len(ifos):
continue

match = [ifo in name for ifo in ifos]
match = [ifo in ifokey for ifo in ifos]
if False in match:
continue
else:
selected = name
break
selected = name
break

if selected is None:
raise RuntimeError("Couldn't figure out which stat file to use")
Expand Down Expand Up @@ -467,12 +465,12 @@ def logsignalrate(self, stats, shift, to_shift):
Parameters
----------
stats: list of dicts giving single-ifo quantities, ordered as
self.ifos
shift: numpy array of float, size of the time shift vector for each
coinc to be ranked
to_shift: list of int, multiple of the time shift to apply ordered
as self.ifos
stats: dict of dicts
Single-detector quantities for each detector
shift: numpy array of float
Time shift vector for each coinc to be ranked
to_shift: list of ints
Multiple of the time shift to apply, ordered as self.ifos
Returns
-------
Expand Down Expand Up @@ -616,26 +614,41 @@ def rank_stat_single(self, single_info):
numpy.ndarray
The array of single detector statistics
"""
err_msg = "Sorry! No-one has implemented this method yet! "
raise NotImplementedError(err_msg)
return self.single(single_info[1])

def rank_stat_coinc(self, s, slide, step, to_shift,
**kwargs): # pylint:disable=unused-argument
def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
**kwargs): # pylint:disable=unused-argument
"""
Calculate the coincident detection statistic.
Calculate the coincident detection statistic, defined in Eq 2 of
[Nitz et al, 2017](https://doi.org/10.3847/1538-4357/aa8f50).
"""
err_msg = "Sorry! No-one has implemented this method yet! "
raise NotImplementedError(err_msg)
rstat = sum(s[1]['snglstat'] ** 2 for s in sngls_list)
cstat = rstat + 2. * self.logsignalrate(dict(sngls_list),
slide * step,
to_shift)
cstat[cstat < 0] = 0
return cstat ** 0.5

def coinc_lim_for_thresh(self, s, thresh, limifo,
**kwargs): # pylint:disable=unused-argument
def coinc_lim_for_thresh(self, sngls_list, thresh, limifo,
**kwargs): # pylint:disable=unused-argument
"""
Optimization function to identify coincs too quiet to be of interest
Calculate the required single detector statistic to exceed
the threshold for each of the input triggers.
Optimization function to identify coincs too quiet to be of interest.
Calculate the required single detector statistic to exceed the
threshold for each of the input triggers.
"""
err_msg = "Sorry! No-one has implemented this method yet! "
raise NotImplementedError(err_msg)
# Safety against subclassing and not rethinking this
allowed_names = ['PhaseTDStatistic']
self._check_coinc_lim_subclass(allowed_names)

if not self.has_hist:
self.get_hist()

lim_stat = [b['snglstat'] for a, b in sngls_list if a == limifo][0]
s1 = thresh ** 2. - lim_stat ** 2.
# Assume best case scenario and use maximum signal rate
s1 -= 2. * self.hist_max
s1[s1 < 0] = 0
return s1 ** 0.5


class ExpFitStatistic(QuadratureSumStatistic):
Expand Down Expand Up @@ -665,8 +678,8 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
The list of detector names
"""

if not len(files):
raise RuntimeError("Can't find any statistic files !")
if not files:
raise RuntimeError("Statistic files not specified")
QuadratureSumStatistic.__init__(self, sngl_ranking, files=files,
ifos=ifos, **kwargs)

Expand Down

0 comments on commit c43fcbb

Please sign in to comment.