Reimplement PhaseTDStatistic (#3596)

* Reimplement PhaseTDStatistic * Fix error in PyCBC Live due to args.statistic_files being modified by from_cli() * Switch PyCBC Live test to use the PTA statistic again * A couple fixes from Codeclimate * Add reference for coinc stat * Add safety check for coinc_lim_for_thresh()
gwastro · Feb 5, 2021 · c43fcbb · c43fcbb
1 parent 0a4dc9b
commit c43fcbb
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 36 deletions.
diff --git a/examples/live/.gitignore b/examples/live/.gitignore
@@ -1,4 +1,5 @@
 strain/
 template_bank.hdf
 injections.hdf
+stat*.hdf
 output/
diff --git a/examples/live/run.sh b/examples/live/run.sh
@@ -89,6 +89,18 @@ else
 fi
 
 
+# make phase-time-amplitude histogram files, if needed
+
+if [[ ! -f statHL.hdf ]]
+then
+    echo -e "\\n\\n>> [`date`] Making phase-time-amplitude files"
+
+    bash ../search/stats.sh
+else
+    echo -e "\\n\\n>> [`date`] Pre-existing phase-time-amplitude files found"
+fi
+
+
 # delete old outputs if they exist
 rm -rf ./output
 
@@ -151,8 +163,9 @@ python -m mpi4py `which pycbc_live` \
 --max-batch-size 16777216 \
 --output-path output \
 --day-hour-output-prefix \
---ranking-statistic quadsum \
 --sngl-ranking newsnr_sgveto \
+--ranking-statistic phasetd \
+--statistic-files statHL.hdf statHV.hdf statLV.hdf \
 --sgchisq-snr-threshold 4 \
 --sgchisq-locations "mtotal>40:20-30,20-45,20-60,20-75,20-90,20-105,20-120" \
 --enable-background-estimation \

diff --git a/pycbc/events/coinc.py b/pycbc/events/coinc.py
@@ -850,21 +850,20 @@ def pick_best_coinc(cls, coinc_results):
     @classmethod
     def from_cli(cls, args, num_templates, analysis_chunk, ifos):
         from . import stat
+
         # Allow None inputs
-        if args.statistic_files is None:
-            args.statistic_files = []
-        if args.statistic_keywords is None:
-            args.statistic_keywords = []
+        stat_files = args.statistic_files or []
+        stat_keywords = args.statistic_keywords or []
 
         # flatten the list of lists of filenames to a single list (may be empty)
-        args.statistic_files = sum(args.statistic_files, [])
+        stat_files = sum(stat_files, [])
 
-        kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)
+        kwargs = stat.parse_statistic_keywords_opt(stat_keywords)
 
         return cls(num_templates, analysis_chunk,
                    args.ranking_statistic,
                    args.sngl_ranking,
-                   args.statistic_files,
+                   stat_files,
                    return_background=args.store_background,
                    ifar_limit=args.background_ifar_limit,
                    timeslide_interval=args.timeslide_interval,

diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py
@@ -153,7 +153,7 @@ def _check_coinc_lim_subclass(self, allowed_names):
 
         coinc_lim_for_thresh is only defined for the statistic it is present
         in. If we subclass, we must check explicitly that it is still valid and
-        inidicate this in the code. If the code does not have this explicit
+        indicate this in the code. If the code does not have this explicit
         check you will see the failure message here.
 
         Parameters
@@ -335,7 +335,6 @@ def get_hist(self, ifos=None):
         """
         Read in a signal density file for the ifo combination
 
-
         Parameters
         ----------
         ifos: list
@@ -355,12 +354,11 @@ def get_hist(self, ifos=None):
                 if num != len(ifos):
                     continue
 
-                match = [ifo in name for ifo in ifos]
+                match = [ifo in ifokey for ifo in ifos]
                 if False in match:
                     continue
-                else:
-                    selected = name
-                    break
+                selected = name
+                break
 
         if selected is None:
             raise RuntimeError("Couldn't figure out which stat file to use")
@@ -467,12 +465,12 @@ def logsignalrate(self, stats, shift, to_shift):
 
         Parameters
         ----------
-        stats: list of dicts giving single-ifo quantities, ordered as
-            self.ifos
-        shift: numpy array of float, size of the time shift vector for each
-            coinc to be ranked
-        to_shift: list of int, multiple of the time shift to apply ordered
-            as self.ifos
+        stats: dict of dicts
+            Single-detector quantities for each detector
+        shift: numpy array of float
+            Time shift vector for each coinc to be ranked
+        to_shift: list of ints
+            Multiple of the time shift to apply, ordered as self.ifos
 
         Returns
         -------
@@ -616,26 +614,41 @@ def rank_stat_single(self, single_info):
         numpy.ndarray
             The array of single detector statistics
         """
-        err_msg = "Sorry! No-one has implemented this method yet! "
-        raise NotImplementedError(err_msg)
+        return self.single(single_info[1])
 
-    def rank_stat_coinc(self, s, slide, step, to_shift,
-                        **kwargs): # pylint:disable=unused-argument
+    def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
+                        **kwargs):  # pylint:disable=unused-argument
         """
-        Calculate the coincident detection statistic.
+        Calculate the coincident detection statistic, defined in Eq 2 of
+        [Nitz et al, 2017](https://doi.org/10.3847/1538-4357/aa8f50).
         """
-        err_msg = "Sorry! No-one has implemented this method yet! "
-        raise NotImplementedError(err_msg)
+        rstat = sum(s[1]['snglstat'] ** 2 for s in sngls_list)
+        cstat = rstat + 2. * self.logsignalrate(dict(sngls_list),
+                                                slide * step,
+                                                to_shift)
+        cstat[cstat < 0] = 0
+        return cstat ** 0.5
 
-    def coinc_lim_for_thresh(self, s, thresh, limifo,
-                             **kwargs): # pylint:disable=unused-argument
+    def coinc_lim_for_thresh(self, sngls_list, thresh, limifo,
+                             **kwargs):  # pylint:disable=unused-argument
         """
-        Optimization function to identify coincs too quiet to be of interest
-        Calculate the required single detector statistic to exceed
-        the threshold for each of the input triggers.
+        Optimization function to identify coincs too quiet to be of interest.
+        Calculate the required single detector statistic to exceed the
+        threshold for each of the input triggers.
         """
-        err_msg = "Sorry! No-one has implemented this method yet! "
-        raise NotImplementedError(err_msg)
+        # Safety against subclassing and not rethinking this
+        allowed_names = ['PhaseTDStatistic']
+        self._check_coinc_lim_subclass(allowed_names)
+
+        if not self.has_hist:
+            self.get_hist()
+
+        lim_stat = [b['snglstat'] for a, b in sngls_list if a == limifo][0]
+        s1 = thresh ** 2. - lim_stat ** 2.
+        # Assume best case scenario and use maximum signal rate
+        s1 -= 2. * self.hist_max
+        s1[s1 < 0] = 0
+        return s1 ** 0.5
 
 
 class ExpFitStatistic(QuadratureSumStatistic):
@@ -665,8 +678,8 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
             The list of detector names
         """
 
-        if not len(files):
-            raise RuntimeError("Can't find any statistic files !")
+        if not files:
+            raise RuntimeError("Statistic files not specified")
         QuadratureSumStatistic.__init__(self, sngl_ranking, files=files,
                                         ifos=ifos, **kwargs)