Cythonize logsignalrate (gwastro#4114)

* DOc additions and Tom-suggested changes * Stat restructure for profiling DO NOT MERGE * More test changes * Finish cython-izing logsignalrate * Remove test function * Remove unused variable * Update pycbc_coinc_findtrigs * Update eventmgr_cython.pyx * minor rewrite for less chatty comment * FIx modulus issue * Resolve rebase issue * Update test * Update pycbc/events/stat.py Co-authored-by: Tito Dal Canton <tito@dalcanton.it> Co-authored-by: Thomas Dent <thomas.dent@usc.es> Co-authored-by: Tito Dal Canton <tito@dalcanton.it>
acorreia61201 · Apr 4, 2024 · 5ac80de · 5ac80de
1 parent 73fcafe
commit 5ac80de
Show file tree

Hide file tree

Showing 4 changed files with 200 additions and 67 deletions.
diff --git a/bin/all_sky_search/pycbc_coinc_findtrigs b/bin/all_sky_search/pycbc_coinc_findtrigs
@@ -173,7 +173,7 @@ if args.timeslide_interval is not None and args.timeslide_interval <= TWOEARTH:
 
 # slide = 0 means don't do timeslides
 if args.timeslide_interval is None:
-    args.timeslide_interval = 0
+    args.timeslide_interval = 0.0
 
 if args.randomize_template_order:
     seed(0)

diff --git a/pycbc/events/eventmgr_cython.pyx b/pycbc/events/eventmgr_cython.pyx
@@ -1,6 +1,7 @@
-import numpy
-cimport numpy
+import numpy as np
+cimport numpy as cnp
 from cython import wraparound, boundscheck, cdivision
+from libc.math cimport M_PI, sqrt
 
 
 ctypedef fused REALTYPE:
@@ -12,9 +13,9 @@ ctypedef fused REALTYPE:
 @wraparound(False)
 @cdivision(True)
 def findchirp_cluster_over_window_cython\
-        (numpy.ndarray[numpy.int32_t, ndim=1] times,
-         numpy.ndarray[REALTYPE, ndim=1] absvalues, int window_length,
-         numpy.ndarray[numpy.int32_t, ndim=1] indices, int tlen):
+        (cnp.ndarray[cnp.int32_t, ndim=1] times,
+         cnp.ndarray[REALTYPE, ndim=1] absvalues, int window_length,
+         cnp.ndarray[cnp.int32_t, ndim=1] indices, int tlen):
     cdef int j = 0
     cdef int curr_ind = 0
     cdef int i
@@ -28,3 +29,91 @@ def findchirp_cluster_over_window_cython\
             indices[j] = i
             curr_ind = i
     return j
+
+
+@boundscheck(False)
+@wraparound(False)
+@cdivision(True)
+def logsignalrateinternals_computepsignalbins(
+    double[:] pdif,
+    double[:] tdif,
+    double[:] sdif,
+    int[:] pbin,
+    int[:] tbin,
+    int[:] sbin,
+    float[:] p,
+    double[:] t,
+    float[:] s,
+    float[:] sig,
+    float[:] pref,
+    double[:] tref,
+    float[:] sref,
+    float[:] sigref,
+    double[:] shift,
+    long int[:] rtype,
+    double sense,
+    double senseref,
+    double twidth,
+    double pwidth,
+    double swidth,
+    int to_shift_ref,
+    int to_shift_ifo,
+    int length
+):
+    cdef:
+        int idx, ridx
+
+    for idx in range(length):
+        ridx = rtype[idx]
+        pdif[idx] = (pref[ridx] - p[ridx]) % (M_PI * 2)
+        if pdif[idx] < 0:
+            # C modulus operator is not same as python's, correct for this
+            pdif[idx] += (M_PI * 2)
+        tdif[idx] = shift[ridx] * to_shift_ref + tref[ridx] - shift[ridx] * to_shift_ifo - t[ridx]
+        sdif[idx] = (s[ridx] * sense * sqrt(sigref[ridx])) / (sref[ridx] * senseref * sqrt(sig[ridx]))
+
+    for idx in range(length):
+        tbin[idx] = <int>(tdif[idx] / twidth)
+        pbin[idx] = <int>(pdif[idx] / pwidth)
+        sbin[idx] = <int>(sdif[idx] / swidth)
+
+@boundscheck(False)
+@wraparound(False)
+@cdivision(True)
+def logsignalrateinternals_compute2detrate(
+    int[:] nbinned0,
+    int[:] nbinned1,
+    int[:] nbinned2,
+    long int c0_size,
+    long int c1_size,
+    long int c2_size,
+    float[:] rate,
+    long int[:] rtype,
+    float[:] sref,
+    float[:,:,::1] two_det_weights, # This declares a C-contiguous array
+    float max_penalty,
+    float ref_snr,
+    int length
+):
+    cdef:
+        int idx, ridx, id0, id1, id2
+        float rescale_fac
+
+    for idx in range(length):
+        ridx = rtype[idx]
+        id0 = nbinned0[idx] + c0_size / 2
+        id1 = nbinned1[idx] + c1_size / 2
+        id2 = nbinned2[idx] + c2_size / 2
+
+        # For bins that exist in the signal pdf histogram, apply that pdf value,
+        # otherwise apply the "max penalty" value.
+        # The bins are specified by 3 indexes (corresponding to the time
+        # difference, phase difference and relative sensitivity dimensions).
+        if (id0 > 0) and (id0 < c0_size) and (id1 > 0) and (id1 < c1_size) and (id2 > 0) and (id2 < c2_size):
+            rate[ridx] = two_det_weights[id0, id1, id2]
+        else:
+            rate[ridx] = max_penalty
+        # Scale by signal population SNR
+        rescale_fac = ref_snr / sref[ridx]
+        rate[ridx] *= (rescale_fac*rescale_fac*rescale_fac*rescale_fac)
+
diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py
@@ -29,6 +29,8 @@
 import numpy
 from . import ranking
 from . import coinc_rate
+from .eventmgr_cython import logsignalrateinternals_computepsignalbins
+from .eventmgr_cython import logsignalrateinternals_compute2detrate
 
 
 class Stat(object):
@@ -335,6 +337,14 @@ def __init__(self, sngl_ranking, files=None, ifos=None,
         self.param_bin = {}
         self.two_det_flag = (len(ifos) == 2)
         self.two_det_weights = {}
+        # Some memory
+        self.pdif = numpy.zeros(128, dtype=numpy.float64)
+        self.tdif = numpy.zeros(128, dtype=numpy.float64)
+        self.sdif = numpy.zeros(128, dtype=numpy.float64)
+        self.tbin = numpy.zeros(128, dtype=numpy.int32)
+        self.pbin = numpy.zeros(128, dtype=numpy.int32)
+        self.sbin = numpy.zeros(128, dtype=numpy.int32)
+
         if pregenerate_hist and not len(ifos) == 1:
             self.get_hist()
 
@@ -446,12 +456,15 @@ def get_hist(self, ifos=None):
                     self.c1_size = {}
                     self.c2_size = {}
 
-                self.c0_size[ifo] = 2 * (abs(self.param_bin[ifo]['c0']).max()
-                                         + 1)
-                self.c1_size[ifo] = 2 * (abs(self.param_bin[ifo]['c1']).max()
-                                         + 1)
-                self.c2_size[ifo] = 2 * (abs(self.param_bin[ifo]['c2']).max()
-                                         + 1)
+                self.c0_size[ifo] = numpy.int32(
+                    2 * (abs(self.param_bin[ifo]['c0']).max() + 1)
+                )
+                self.c1_size[ifo] = numpy.int32(
+                    2 * (abs(self.param_bin[ifo]['c1']).max() + 1)
+                )
+                self.c2_size[ifo] = numpy.int32(
+                    2 * (abs(self.param_bin[ifo]['c2']).max() + 1)
+                )
 
                 array_size = [self.c0_size[ifo], self.c1_size[ifo],
                               self.c2_size[ifo]]
@@ -508,67 +521,96 @@ def logsignalrate(self, stats, shift, to_shift):
 
         # Get reference ifo information
         rate = numpy.zeros(len(shift), dtype=numpy.float32)
+        ps = {ifo: numpy.array(stats[ifo]['coa_phase'], ndmin=1)
+              for ifo in self.ifos}
+        ts = {ifo: numpy.array(stats[ifo]['end_time'], ndmin=1)
+              for ifo in self.ifos}
+        ss = {ifo: numpy.array(stats[ifo]['snr'], ndmin=1)
+              for ifo in self.ifos}
+        sigs = {ifo: numpy.array(stats[ifo]['sigmasq'], ndmin=1)
+                for ifo in self.ifos}
         for ref_ifo in self.ifos:
             rtype = rtypes[ref_ifo]
-            ref = stats[ref_ifo]
-            pref = numpy.array(ref['coa_phase'], ndmin=1)[rtype]
-            tref = numpy.array(ref['end_time'], ndmin=1)[rtype]
-            sref = numpy.array(ref['snr'], ndmin=1)[rtype]
-            sigref = numpy.array(ref['sigmasq'], ndmin=1) ** 0.5
-            sigref = sigref[rtype]
+            pref = ps[ref_ifo]
+            tref = ts[ref_ifo]
+            sref = ss[ref_ifo]
+            sigref = sigs[ref_ifo]
             senseref = self.relsense[self.hist_ifos[0]]
 
             binned = []
             other_ifos = [ifo for ifo in self.ifos if ifo != ref_ifo]
             for ifo in other_ifos:
-                sc = stats[ifo]
-                p = numpy.array(sc['coa_phase'], ndmin=1)[rtype]
-                t = numpy.array(sc['end_time'], ndmin=1)[rtype]
-                s = numpy.array(sc['snr'], ndmin=1)[rtype]
-
-                sense = self.relsense[ifo]
-                sig = numpy.array(sc['sigmasq'], ndmin=1) ** 0.5
-                sig = sig[rtype]
+                # Assign cached memory
+                length = len(rtype)
+                while length > len(self.pdif):
+                    newlen = len(self.pdif) * 2
+                    self.pdif = numpy.zeros(newlen, dtype=numpy.float64)
+                    self.tdif = numpy.zeros(newlen, dtype=numpy.float64)
+                    self.sdif = numpy.zeros(newlen, dtype=numpy.float64)
+                    self.pbin = numpy.zeros(newlen, dtype=numpy.int32)
+                    self.tbin = numpy.zeros(newlen, dtype=numpy.int32)
+                    self.sbin = numpy.zeros(newlen, dtype=numpy.int32)
 
                 # Calculate differences
-                pdif = (pref - p) % (numpy.pi * 2.0)
-                tdif = shift[rtype] * to_shift[ref_ifo] + \
-                    tref - shift[rtype] * to_shift[ifo] - t
-                sdif = s / sref * sense / senseref * sigref / sig
-
-                # Put into bins
-                tbin = (tdif / self.twidth).astype(int)
-                pbin = (pdif / self.pwidth).astype(int)
-                sbin = (sdif / self.swidth).astype(int)
-                binned += [tbin, pbin, sbin]
-
-            # Convert binned to same dtype as stored in hist
-            nbinned = numpy.zeros(len(pbin), dtype=self.pdtype)
-            for i, b in enumerate(binned):
-                nbinned['c%s' % i] = b
+                logsignalrateinternals_computepsignalbins(
+                    self.pdif,
+                    self.tdif,
+                    self.sdif,
+                    self.pbin,
+                    self.tbin,
+                    self.sbin,
+                    ps[ifo],
+                    ts[ifo],
+                    ss[ifo],
+                    sigs[ifo],
+                    pref,
+                    tref,
+                    sref,
+                    sigref,
+                    shift,
+                    rtype,
+                    self.relsense[ifo],
+                    senseref,
+                    self.twidth,
+                    self.pwidth,
+                    self.swidth,
+                    to_shift[ref_ifo],
+                    to_shift[ifo],
+                    length
+                )
+
+                binned += [
+                    self.tbin[:length],
+                    self.pbin[:length],
+                    self.sbin[:length]
+                ]
 
             # Read signal weight from precalculated histogram
             if self.two_det_flag:
                 # High-RAM, low-CPU option for two-det
-                rate[rtype] = numpy.zeros(len(nbinned)) + self.max_penalty
-
-                id0 = nbinned['c0'].astype(numpy.int32) \
-                    + self.c0_size[ref_ifo] // 2
-                id1 = nbinned['c1'].astype(numpy.int32) \
-                    + self.c1_size[ref_ifo] // 2
-                id2 = nbinned['c2'].astype(numpy.int32) \
-                    + self.c2_size[ref_ifo] // 2
-
-                # look up keys which are within boundaries
-                within = (id0 > 0) & (id0 < self.c0_size[ref_ifo])
-                within = within & (id1 > 0) & (id1 < self.c1_size[ref_ifo])
-                within = within & (id2 > 0) & (id2 < self.c2_size[ref_ifo])
-                within = numpy.where(within)[0]
-                rate[rtype[within]] = \
-                    self.two_det_weights[ref_ifo][id0[within], id1[within],
-                                                  id2[within]]
+                logsignalrateinternals_compute2detrate(
+                    binned[0],
+                    binned[1],
+                    binned[2],
+                    self.c0_size[ref_ifo],
+                    self.c1_size[ref_ifo],
+                    self.c2_size[ref_ifo],
+                    rate,
+                    rtype,
+                    sref,
+                    self.two_det_weights[ref_ifo],
+                    self.max_penalty,
+                    self.ref_snr,
+                    len(rtype)
+                )
             else:
                 # Low[er]-RAM, high[er]-CPU option for >two det
+
+                # Convert binned to same dtype as stored in hist
+                nbinned = numpy.zeros(len(binned[1]), dtype=self.pdtype)
+                for i, b in enumerate(binned):
+                    nbinned[f'c{i}'] = b
+
                 loc = numpy.searchsorted(self.param_bin[ref_ifo], nbinned)
                 loc[loc == len(self.weights[ref_ifo])] = 0
                 rate[rtype] = self.weights[ref_ifo][loc]
@@ -579,9 +621,8 @@ def logsignalrate(self, stats, shift, to_shift):
                     self.param_bin[ref_ifo][loc] != nbinned
                 )[0]
                 rate[rtype[missed]] = self.max_penalty
-
-            # Scale by signal population SNR
-            rate[rtype] *= (sref / self.ref_snr) ** -4.0
+                # Scale by signal population SNR
+                rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4.0
 
         return numpy.log(rate)
 

diff --git a/test/test_live_coinc_compare.py b/test/test_live_coinc_compare.py
@@ -29,13 +29,16 @@ def __init__(self, num_templates, analysis_chunk, detectors, num_trigs_per_block
     def get_trigs(self):
         trigs = {}
         for det in self.detectors:
+            rand_end = np.random.randint(
+                self.start_time*4096,
+                (self.start_time + self.analysis_chunk)*4096,
+                size=self.num_trigs
+            )
+            rand_end = (rand_end / 4096.).astype(np.float64)
+
             trigs[det] = {
                 "snr": np.random.uniform(4.5, 10, size=self.num_trigs).astype(np.float32),
-                "end_time": np.random.uniform(
-                    self.start_time,
-                    self.start_time + self.analysis_chunk,
-                    size=self.num_trigs
-                ).astype(np.float64),
+                "end_time": rand_end,
                 "chisq": np.random.uniform(0.5, 1.5, size=self.num_trigs).astype(np.float32),
                 "chisq_dof": np.ones(self.num_trigs, dtype=np.int32) * 10,
                 "coa_phase": np.random.uniform(0, 2*np.pi, size=self.num_trigs).astype(np.float32),
@@ -117,7 +120,7 @@ def test_coincer_runs(self):
         new_coincer = self.new_coincer
         old_coincer = self.old_coincer
         self.assertTrue(len(new_coincer.coincs.data) == len(old_coincer.coincs.data))
-        self.assertTrue((new_coincer.coincs.data == old_coincer.coincs.data).all())
+        self.assertTrue(numpy.isclose(new_coincer.coincs.data, old_coincer.coincs.data, rtol=1e-06).all())
 
         for ifo in new_coincer.singles:
             lgc = True