Dev comments for the pycbc_multi_inspiral executable (gwastro#4513)

* Dev comments for the pycbc_multi_inspiral executable * minor changes to dev comments pycbc_multi_inspiral * minor change in dev comments pycbc_multi_inspiral * Update dev comments pycbc_multi_inspiral * Update pycbc_multi_inspiral * Update pycbc_multi_inspiral * Update dev comments for pycbc_multi_inspiral * Update on dev comments for pycbc_multi_inspiral Simplified the comments in the argparse section of the code. And moved some of the important details to gwastro#4513 (comment) . This has been done in order to keep track of them and migrate them to issues and PRs in the near future * Update to dev comments pycbc_multi_inspiral * Update dev comments pycbc_multi_inspiral * Update dev comments pycbc_multi_inspiral * Update dev comments pycbc_multi_inspiral * Update dev comments pycbc_multi_inspiral
acorreia61201 · Apr 4, 2024 · d248865 · d248865
1 parent 9645f34
commit d248865
Showing 1 changed file with 84 additions and 31 deletions.
diff --git a/bin/pycbc_multi_inspiral b/bin/pycbc_multi_inspiral
@@ -19,6 +19,10 @@
 """
 Find multi-detector gravitational wave triggers and calculate the
 coherent SNRs and related statistics.
+
+To see an example on how to analyze the GW170817 event using this executable,
+take a look at:
+https://github.com/gwastro/pycbc/blob/master/examples/multi_inspiral/run.sh
 """
 
 import logging
@@ -36,10 +40,14 @@ from pycbc.filter import MatchedFilterControl
 from pycbc.types import TimeSeries, zeros, float32, complex64
 from pycbc.types import MultiDetOptionAction
 from pycbc.vetoes import sgchisq
+
+
+# The following block of lines sets up the command-line interface (CLI) for the
+# pycbc_multi_inspiral executable.
 time_init = time.time()
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument("-V", "--verbose", action="store_true",
-                    help="print extra debugging information",
+                    help="prints extra debugging information during runtime",
                     default=False)
 parser.add_argument("--output", type=str)
 parser.add_argument("--instruments", nargs="+", type=str, required=True,
@@ -74,7 +82,7 @@ parser.add_argument("--bank-veto-bank-file", type=str, help="Path to the "
                     "bank file used to compute the the bank chi-square veto.")
 parser.add_argument("--chisq-bins", default=0)
 # Commenting out options which are not yet implemented
-# parser.add_argument("--chisq-threshold", type=float, default=0) 
+# parser.add_argument("--chisq-threshold", type=float, default=0)
 # parser.add_argument("--chisq-delta", type=float, default=0)
 parser.add_argument("--autochi-number-points", type=int, default=0)
 parser.add_argument("--autochi-stride", type=int, default=0)
@@ -120,7 +128,7 @@ parser.add_argument("--trigger-time", type=int,
                     help="Time of the GRB, used to set the antenna patterns.")
 parser.add_argument("--projection", default="standard",
                     choices=["standard", "left", "right", "left+right"],
-                    help="Choice of projection matrix. 'left' and 'right' " 
+                    help="Choice of projection matrix. 'left' and 'right' "
                          "correspond to face-away and face-on")
 parser.add_argument("--num-slides", type=int, default=0,
                     help="Number of time slides to perform.")
@@ -139,19 +147,38 @@ args = parser.parse_args()
 init_logging(args.verbose)
 # Set GRB time variable for convenience
 t_gps = args.trigger_time
-# Put the ifos in alphabetical order so they are always called in
-# the same order.
+# The following line aranges the ifos in alphabetical order so they are 
+# always called in the same order.
 args.instruments.sort()
-strain.verify_strain_options_multi_ifo(args, parser, args.instruments)
+nifo = len(args.instruments[:])
+# The following class verification methods are used to check whether input CLI
+# options provided by parser to pycbc.strain, pycbc.strain.StrainSegments,
+# pycbc.psd, pycbc.scheme & pycbc.fft modules are sane.
+# pycbc.strain CLI verifier:
+strain.verify_strain_options_multi_ifo(
+                                      args, parser, args.instruments)
 strain.StrainSegments.verify_segment_options_multi_ifo(
-        args, parser, args.instruments)
-psd.verify_psd_options_multi_ifo(args, parser, args.instruments)
+                                      args, parser, args.instruments)
+# The pycbc.psd CLI verifier:
+psd.verify_psd_options_multi_ifo(
+                                args, parser, args.instruments)
+# The pycbc.scheme CLI verifier:
 scheme.verify_processing_options(args, parser)
+# The pycbc.fft CLI verifier:
 fft.verify_fft_options(args, parser)
+# The following stores a Dictionary of "InjFilterRejector" objects
+# which is used to avoid using computing power on injections corresponding 
+# to templates with significantly different chirp mass.
 inj_filter_rejector = inject.InjFilterRejector.from_cli_multi_ifos(
     args, args.instruments)
+# The following stores a dictionary of "timeseries" objects. Each of them
+# corresponds to the strain each detectors.
+# strain data is taken from args.gps_start_time to args.gps_end_time
+# using the sampling rate args.sample_rate
 strain_dict = strain.from_cli_multi_ifos(
     args, args.instruments, inj_filter_rejector, dyn_range_fac=DYN_RANGE_FAC)
+# The following stores a dictionary of python slice objects, that indicate
+# where the segments begin and end,for each detector's timeseries.
 strain_segments_dict = strain.StrainSegments.from_cli_multi_ifos(
     args, strain_dict, args.instruments)
 ctx = scheme.from_cli(args)
@@ -160,6 +187,9 @@ with ctx:
     # Set some often used variables for easy access
     flow = args.low_frequency_cutoff
     flow_dict = defaultdict(lambda : flow)
+    # The following for loop is used to check whether
+    # the sampling rate, flen and tlen agree for all detectors
+    # taking the zeroth detector in the list as a reference.
     for count, ifo in enumerate(args.instruments):
         if count == 0:
             sample_rate = strain_dict[ifo].sample_rate
@@ -180,12 +210,17 @@ with ctx:
                 err_msg = "Sample rate, frequency length and time length "
                 err_msg += "must all be consistent across ifos."
                 raise ValueError(err_msg)
+    # segments is a dictionary of "frequency domain" objects, each
+    # of which is the Fourier transform of the segments in strain_segments_dict
     logging.info("Making frequency-domain data segments")
     segments = {
         ifo: strain_segments_dict[ifo].fourier_segments()
         for ifo in args.instruments
         }
+    # The timedomain segments dictionary is deleted to save memory.
     del strain_segments_dict
+    # The follwowing pycbc.psd method
+    # associates PSDs to segments for all ifos when using the multi-detector CLI
     psd.associate_psds_to_multi_ifo_segments(
         args, segments, strain_dict, flen, delta_f, flow, args.instruments,
         dyn_range_factor=DYN_RANGE_FAC, precision='single')
@@ -194,28 +229,32 @@ with ctx:
     # change if needed. Segments is only used to get tlen etc. which is
     # same for all ifos, so just send the first ifo
     template_mem = zeros(tlen, dtype=complex64)
-    
+
     #Read the sky grid or the single sky position
     if args.sky_grid is not None and args.ra is not None and args.dec is not None:
         parser.error('Give either a sky grid or a sky position, not both')
-    
+
     if args.sky_grid is not None:
         sky_grid = h5py.File(args.sky_grid, 'r')
         ra = np.array(sky_grid['ra'])
         dec = np.array(sky_grid['dec'])
     if args.ra is not None and args.dec is not None:
         ra = np.array([args.ra])
         dec = np.array([args.dec])
-    
+
     sky_positions = np.array([ra, dec])
     num_sky_positions = sky_positions.shape[1]
     positions_array = np.arange(num_sky_positions)
 
-    # Calculate time delays to each detector for each sky position and apply time slide shifts
+    # Calculate time delays to each detector and store time slide shifts
+    # in a dedicated dictionary
     slide_ids = np.arange(1 + args.num_slides)
     time_slides = {
         ifo: args.slide_shift * slide_ids * n_ifo
         for n_ifo, ifo in enumerate(args.instruments)}
+    # Given the time delays in time_slides, the following dictionary keeps the
+    # time delay indices evaluated wrt the geocenter, in units of samples, i.e.
+    # (time delay from geocenter + timeslide)*sampling_rate
     time_delay_idx = {
             slide: {
                 position_index: {
@@ -229,10 +268,10 @@ with ctx:
                 } for position_index in positions_array
             } for slide in slide_ids
         }
-    
+
     # Matched filter each ifo. Don't cluster here for a coherent search.
     # Clustering happens at the end of the template loop.
-    # FIXME: The single detector SNR threshold should not necessarily be
+    # NOTE: The single detector SNR threshold should not necessarily be
     #        applied to every IFO (usually only 2 most sensitive in
     #        network)
     matched_filter = {
@@ -243,6 +282,9 @@ with ctx:
             upsample_threshold=args.upsample_threshold,
             upsample_method=args.upsample_method, cluster_function='symmetric')
         for ifo in args.instruments}
+    # Vetoes
+    # The existing SingleDetPowerChisq can calculate the single detector
+    # chisq for multiple ifos, so just use that directly.
     logging.info("Initializing signal-based vetoes.")
     # The existing SingleDetPowerChisq can calculate the single detector
     # chisq for multiple ifos, so just use that directly.
@@ -256,10 +298,13 @@ with ctx:
     autochisq = vetoes.SingleDetAutoChisq(
         args.autochi_stride, args.autochi_number_points,
         onesided=args.autochi_onesided)
+    # Here we take all frequency-domain segments and over-whiten them
+    # dividing by the PSD estimate
     logging.info("Overwhitening frequency-domain data segments")
     for ifo in args.instruments:
         for seg in segments[ifo]:
             seg /= seg.psd
+    # The following dicts are used to initialize and feed the event manager
     ifo_out_types = {
         'time_index': int,
         'ifo': int, # IFO is stored as an int internally!
@@ -301,17 +346,18 @@ with ctx:
         'coherent_snr': None,
         'null_snr': None,
         'nifo': None,
-        'my_network_chisq': None, 
+        'my_network_chisq': None,
         'reweighted_snr': None,
         'slide_id': None
         }
     network_names = sorted(network_out_vals.keys())
     event_mgr = EventManagerCoherent(
         args, args.instruments, ifo_names,
         [ifo_out_types[n] for n in ifo_names], network_names,
-        [network_out_types[n] for n in network_names],
+        [network_out_types[n] for n in network_names], 
         segments=segments, time_slides=time_slides)
 
+    # Template bank: filtering and thinning
     logging.info("Read in template bank")
     bank = waveform.FilterBank(
         args.bank_file, flen, delta_f, complex64, low_frequency_cutoff=flow,
@@ -327,7 +373,7 @@ with ctx:
     if not len(bank) == n_bank:
         n_bank = len(bank)
         logging.info("Template bank size after thinning: %d", n_bank)
-   
+
     # Antenna patterns
     antenna_patterns = [[[0 for i in range(2)] for position_index in positions_array] for i in range(len(args.instruments))]
     for i, ifo in enumerate(args.instruments):
@@ -344,7 +390,7 @@ with ctx:
         for s_num,stilde in enumerate(segments[args.instruments[0]]):
             stilde = {ifo : segments[ifo][s_num] for ifo in args.instruments}
             # Filter check checks the 'inj_filter_rejector' options to
-            # determine whether to filter this template/segment 
+            # determine whether to filter this template/segment
             # if injections are present.
             analyse_segment = True
             for ifo in args.instruments:
@@ -361,45 +407,52 @@ with ctx:
                 ifo : template.sigmasq(segments[ifo][s_num].psd)
                 for ifo in args.instruments}
             sigma = {ifo : np.sqrt(sigmasq[ifo]) for ifo in args.instruments}
-            # Every time s_num is zero or we skip the segment, we run new 
+            # Every time s_num is zero or we skip the segment, we run new
             # template to increment the template index
             if s_num==0:
                 event_mgr.new_template(tmplt=template.params, sigmasq=sigmasq)
             if not analyse_segment: continue
             logging.info(
                 "Analyzing segment %d/%d", s_num + 1, len(segments[ifo]))
+            # The following dicts are created to store copies of the
+            # matched filtering results computed below
             snr_dict = dict.fromkeys(args.instruments)
             norm_dict = dict.fromkeys(args.instruments)
             corr_dict = dict.fromkeys(args.instruments)
             idx = dict.fromkeys(args.instruments)
             snrv_dict = dict.fromkeys(args.instruments)
             snr = dict.fromkeys(args.instruments)
-            # FIXME: 2 lines that can be moved outside the loops
-            #        We do not really use ifo_list, and we reassign
-            #        nifo identically a few lines below
-            ifo_list = args.instruments[:]
-            nifo = len(ifo_list)
             for ifo in args.instruments:
                 logging.info(
                     "Filtering template %d/%d, ifo %s", t_num + 1, n_bank, ifo)
                 # No clustering in the coherent search until the end.
                 # The correlation vector is the FFT of the snr (so inverse
-                # FFT it to get the snr). 
+                # FFT it to get the snr).
+              	# The following lines Unpack & store copies of the matched
+              	# filtering results for the current segment and template.
                 snr_ts, norm, corr, ind, snrv = \
                      matched_filter[ifo].matched_filter_and_cluster(
                          s_num, template.sigmasq(stilde[ifo].psd), window=0)
+                # snr time series for each ifo
                 snr_dict[ifo] = (
                         snr_ts[matched_filter[ifo].segments[s_num].analyze]
                         * norm)
                 norm_dict[ifo] = norm
+                # The correlation vector for each ifo. it is the  FFT of the snr
+                # (so inverse FFT it to get the snr).
                 corr_dict[ifo] = corr.copy()
+                # Trigger indices list for each ifo
                 idx[ifo] = ind.copy()
+                # Trigger snr list for each ifo
                 snrv_dict[ifo] = snrv.copy()
+		# Normalized trigger snr list for each ifo
                 snr[ifo] = snrv * norm
-
-            # FIXME: wrong comment?
+
             # Move onto next segment if there are no triggers.
-            if len(ifo_list)==0: continue
+            n_trigs = [len(snrv_dict[ifo]) for ifo in args.instruments]
+            if not any(n_trigs):
+                continue
+            # Short timeslides start here !
             # Loop through slides, staring with the zero-lag
             for slide in range(args.num_slides + 1):
                 logging.info(
@@ -411,8 +464,8 @@ with ctx:
                     # Even if we have none, need to keep an empty dictionary.
                     # Only do this if idx doesn't get time shifted out of the
                     # time we are looking at, i.e., require
-                    # idx[ifo] - time_delay_idx[slide][position_index][ifo] to be in
-                    # (0, len(snr_dict[ifo]))
+                    # idx[ifo] - time_delay_idx[slide][position_index][ifo]
+                    # to be in  (0, len(snr_dict[ifo]))
                     idx_dict = {
                     ifo: idx[ifo][
                         np.logical_and(
@@ -422,7 +475,7 @@ with ctx:
                         ]
                     for ifo in args.instruments
                     }
-                    
+
                     # Find triggers that are coincident (in geocent time) in
                     # multiple ifos. If a single ifo analysis then just use the
                     # indexes from that ifo.