Merge pull request #9 from gwastro/master

Updating fork
maxtrevor · Sep 17, 2020 · 7416f12 · 7416f12
2 parents 88bdfff + 75799c2
commit 7416f12
Show file tree

Hide file tree

Showing 67 changed files with 2,398 additions and 832 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,5 @@
 FROM centos:centos7
 
-COPY docker/.singularity.d /.singularity.d
 ADD docker/etc/profile.d/pycbc.sh /etc/profile.d/pycbc.sh
 ADD docker/etc/profile.d/pycbc.csh /etc/profile.d/pycbc.csh
 ADD docker/etc/cvmfs/default.local /etc/cvmfs/default.local
@@ -16,10 +15,6 @@ RUN rpm -ivh http://software.ligo.org/lscsoft/scientific/7/x86_64/production/l/l
 
 # set up environment
 RUN cd / && \
-    ln -s .singularity.d/actions/exec .exec && \
-    ln -s .singularity.d/actions/run .run && \
-    ln -s .singularity.d/actions/test .shell && \
-    ln -s .singularity.d/runscript singularity && \
     mkdir -p /cvmfs/config-osg.opensciencegrid.org /cvmfs/oasis.opensciencegrid.org /cvmfs/gwosc.osgstorage.org && echo "config-osg.opensciencegrid.org /cvmfs/config-osg.opensciencegrid.org cvmfs ro,noauto 0 0" >> /etc/fstab && echo "oasis.opensciencegrid.org /cvmfs/oasis.opensciencegrid.org cvmfs ro,noauto 0 0" >> /etc/fstab && echo "gwosc.osgstorage.org /cvmfs/gwosc.osgstorage.org cvmfs ro,noauto 0 0" >> /etc/fstab && mkdir -p /oasis /scratch /projects /usr/lib64/slurm /var/run/munge && \
     groupadd -g 1000 pycbc && useradd -u 1000 -g 1000 -d /opt/pycbc -k /etc/skel -m -s /bin/bash pycbc
 
@@ -29,11 +24,19 @@ RUN yum install -y libibverbs libibverbs-devel libibmad libibmad-devel libibumad
     cd / && rm -rf /tmp/mvapich2-2.1 && \
     pip install schwimmbad && \
     MPICC=/opt/mvapich2-2.1/bin CFLAGS='-I /opt/mvapich2-2.1/include -L /opt/mvapich2-2.1/lib -lmpi' pip install --no-cache-dir mpi4py
-RUN echo "/opt/mvapich2-2.1/lib" > /etc/ld.so.conf.d/mvaapich2-2.1.conf
+RUN echo "/opt/mvapich2-2.1/lib" > /etc/ld.so.conf.d/mvapich2-2.1.conf
 
 # Now update all of our library installations
 RUN rm -f /etc/ld.so.cache && /sbin/ldconfig
 
+# Explicitly set the path so that it is not inherited from build the environment
+ENV PATH "/usr/local/bin:/usr/bin:/bin:/opt/mvapich2-2.1/bin"
+
+# Set the default LAL_DATA_PATH to point at CVMFS first, then the container.
+# Users wanting it to point elsewhere should start docker using:
+#   docker <cmd> -e LAL_DATA_PATH="/my/new/path"
+ENV LAL_DATA_PATH "/cvmfs/oasis.opensciencegrid.org/ligo/sw/pycbc/lalsuite-extra/current/share/lalsimulation:/opt/pycbc/pycbc-software/share/lal-data"
+
 # When the container is started with
 #   docker run -it pycbc/pycbc-el7:latest
 # the default is to start a loging shell as the pycbc user.

diff --git a/bin/hdfcoinc/pycbc_coinc_findtrigs b/bin/hdfcoinc/pycbc_coinc_findtrigs
@@ -195,6 +195,18 @@ if args.use_maxalpha:
 det0, det1 = detector.Detector(trigs0.ifo), detector.Detector(trigs1.ifo)
 time_window = det0.light_travel_time_to_detector(det1) + args.coinc_threshold
 
+# Calculate the total decimation factors for each step
+total_factors = [1]
+threshes = [numpy.inf]
+for decstr in args.loudest_keep_values:
+    thresh, factor = decstr.split(':')
+    if int(factor) == 1:
+        continue
+    threshes.append(float(thresh))
+    # throws an error if 'factor' is not the string representation
+    # of an integer
+    total_factors.append(total_factors[-1] * int(factor))
+
 if args.timeslide_interval is not None and \
         time_window >= args.timeslide_interval:
     raise parser.error("The maximum time delay between detectors should be "
@@ -239,94 +251,161 @@ for tnum in template_ids:
     logging.info('Calculating Single Detector Statistic')
     s0g, s1g = rank_method.single(trigs0), rank_method.single(trigs1)
 
+    # Test whether s0g and s1g are single arrays or record arrays
+    # this depends on the stat being used
+    try:
+        s0gstat = s0g['snglstat'].copy()
+        s1gstat = s1g['snglstat'].copy()
+    except IndexError:
+        s0gstat = s0g.copy()
+        s1gstat = s1g.copy()
+
+    s0gsort = numpy.argsort(s0gstat)
+    s1gsort = numpy.argsort(s1gstat)
+
+    if not rank_method.single_increasing:
+        s0gsort = s0gsort[::-1]
+        s1gsort = s1gsort[::-1]
+
     # Loop over the single triggers and calculate the coincs they can
     # form
     start0 = 0
     while start0 < len(s0g):
         start1 = 0
+
+        end0 = start0 + args.batch_singles
+        if end0 > len(s0g):
+            end0 = len(s0g)
+
+        s0gid = s0gsort[start0:end0]
+
+        # Set the local parts of the single information we'll use
+        tid0 = tid0g[s0gid]
+        s0 = s0g[s0gid]
+        t0 = t0g[s0gid]
+
+        s1lims = {}
+        s1lower = {}
+        for kidx in range(1, len(threshes)):
+            # For each trigger in detector 0 get the limit in detector 1 to pass
+            # the current decimation threshold
+            s1lims[kidx] = rank_method.coinc_lim_for_thresh(s0, threshes[kidx])
+            if not rank_method.single_increasing:
+                s1lims[kidx] *= -1.
+            # subtract small amount to account for errors due to rounding
+            s1lims[kidx] -= 1e-6
+            # Get the minimum statistic required for all triggers at the
+            # current decimation threshold
+            s1lower[kidx] = s1lims[kidx].min()
+
         while start1 < len(s1g):
 
-            end0 = start0 + args.batch_singles
             end1 = start1 + args.batch_singles
-            if end0 > len(s0g):
-                end0 = len(s0g)
             if end1 > len(s1g):
                 end1 = len(s1g)
 
-            # Set the local parts of the single information we'll use
-            tid0 = tid0g[start0:end0]
-            tid1 = tid1g[start1:end1]
-            s0 = s0g[start0:end0]
-            s1 = s1g[start1:end1]
-            t0 = t0g[start0:end0]
-            t1 = t1g[start1:end1]
-
-            i0, i1, slide = coinc.time_coincidence(t0, t1, time_window,
-                                                   args.timeslide_interval)
+            s1gid = s1gsort[start1:end1]
 
-            logging.info('Coincident Trigs: %s' % (len(i1)))
+            # Set the local parts of the single information we'll use
+            tid1 = tid1g[s1gid]
+            s1 = s1g[s1gid]
+            t1 = t1g[s1gid]
 
+            # Do time coincidence for slides that will be kept after all decimation
+            curr_shift = args.timeslide_interval * total_factors[-1]
+            i0, i1, slide = coinc.time_coincidence(t0, t1, time_window, curr_shift)
+            slide *= total_factors[-1]
 
-            logging.info('Calculating Multi-Detector Combined Statistic: %s, %s', end0, end1)
-            c = rank_method.coinc(s0[i0], s1[i1], slide,
-                                  args.timeslide_interval)
+            c = rank_method.coinc(s0[i0], s1[i1], slide, args.timeslide_interval)
 
-            #index values of the zerolag triggers
+            # index values of the zerolag triggers
             fi = numpy.where(slide == 0)[0]
 
-            #index values of the background triggers
+            # index values of the background triggers
             bi = numpy.where(slide != 0)[0]
-            logging.info('%s foreground triggers' % len(fi))
-            logging.info('%s background triggers' % len(bi))
-
-            # coincs will be decimated by successive (multiplicative) levels
-            # tracked by 'total_factor'
-            bi_dec = bi.copy()
-            dec = numpy.ones(len(bi))
-
-            total_factor = 1
-            for decstr in args.loudest_keep_values:
-                thresh, factor = decstr.split(':')
-                thresh = float(thresh)
-                # throws an error if 'factor' is not the string representation
-                # of an integer
-                total_factor *= int(factor)
-
-                # triggers not being further decimated
-                upper = c[bi_dec] >= thresh
-                idxk = bi_dec[upper]
-
-                # decimate the remaining triggers
-                idx = bi_dec[c[bi_dec] < thresh]
-                idx = idx[slide[idx] % total_factor == 0]
-
-                bi_dec = numpy.concatenate([idxk, idx])
-                dec = numpy.concatenate([dec[upper],
-                                         numpy.repeat(total_factor, len(idx))]
-                                       )
-
-            ti = numpy.concatenate([bi_dec, fi]).astype(numpy.uint32)
-            dec_fac = numpy.concatenate([dec, numpy.ones(len(fi))])
-            logging.info('%s after decimation' % len(ti))
-
-            # temporary storage for decimated trigger ids
-            g0 = i0[ti]
-            g1 = i1[ti]
-            del i0
-            del i1
-
-            data['stat'] += [c[ti]]
-            data['decimation_factor'] += [dec_fac]
-            data['time1'] += [t0[g0]]
-            data['time2'] += [t1[g1]]
-            data['trigger_id1'] += [tid0[g0]]
-            data['trigger_id2'] += [tid1[g1]]
-            data['timeslide_id'] += [slide[ti]]
-            data['template_id'] += [numpy.repeat(tnum, len(ti))]
+
+            # keep foreground triggers and background triggers below
+            # the lowest decimation threshold
+            bl = bi[c[bi] < threshes[-1]]
+            ti = numpy.concatenate([fi, bl])
+
+            i0 = i0[ti]
+            i1 = i1[ti]
+            c = c[ti]
+            slide = slide[ti]
+            dec = numpy.concatenate([numpy.ones(len(fi)), numpy.repeat(total_factors[-1], len(bl))])
+
+            try:
+                s1stat = s1['snglstat'].copy()
+            except IndexError:
+                s1stat = s1.copy()
+
+            if not rank_method.single_increasing:
+                s1stat *= -1.
+
+            # Starting from the largest decimation threshold, find the first decimation step
+            # where the loudest single detector trigger in s1 can pass the decimation threshold
+            # with any trigger in s0
+            tidx = len(threshes)
+            for i in range(1, len(threshes)):
+                if s1stat[-1] >= s1lower[kidx]:
+                    tidx = i
+                    break
+
+            # loop through decimation steps starting from the first step where passing
+            # the threshold is possible
+            for kidx in range(tidx, len(threshes)):
+
+                # Remove triggers in detector 1 that cannot form coincidences above
+                # the current decimation threshold
+                s1cut = numpy.searchsorted(s1stat, s1lower[kidx])
+
+                s1s = s1stat[s1cut:]
+                test_t1 = t1[s1cut:]
+                test_t0 = t0.copy()
+
+                # Do time coincidence for current decimation
+                curr_shift = args.timeslide_interval * total_factors[kidx - 1]
+                i0tmp, i1tmp, slidetmp = coinc.time_coincidence(test_t0, test_t1,
+                                                                time_window, curr_shift)
+                slidetmp *= total_factors[kidx - 1]
+
+                # Remove foreground triggers
+                bitmp = numpy.where(slidetmp != 0)[0]
+
+                # remove coincidences where detector 0 has a single detector statistic
+                # below the limit calculated above
+                bitmp = bitmp[s1s[i1tmp[bitmp]] >= s1lims[kidx][i0tmp[bitmp]]]
+                i0tmp = i0tmp[bitmp]
+                i1tmp = i1tmp[bitmp] + s1cut
+                slidetmp = slidetmp[bitmp]
+
+                ctmp = rank_method.coinc(s0[i0tmp], s1[i1tmp],
+                                         slidetmp, args.timeslide_interval)
+
+                # Keep triggers in the current decimation range
+                bitmp = numpy.where(ctmp >= threshes[kidx])[0]
+                bitmp = bitmp[ctmp[bitmp] < threshes[kidx - 1]]
+
+                i0 = numpy.concatenate([i0, i0tmp[bitmp]])
+                i1 = numpy.concatenate([i1, i1tmp[bitmp]])
+                c = numpy.concatenate([c, ctmp[bitmp]])
+                slide = numpy.concatenate([slide, slidetmp[bitmp]])
+                dec = numpy.concatenate([dec, numpy.repeat(total_factors[kidx - 1], len(bitmp))])
+
+            data['stat'] += [c]
+            data['decimation_factor'] += [dec]
+            data['time1'] += [t0[i0]]
+            data['time2'] += [t1[i1]]
+            data['trigger_id1'] += [tid0[i0]]
+            data['trigger_id2'] += [tid1[i1]]
+            data['timeslide_id'] += [slide]
+            data['template_id'] += [numpy.repeat(tnum, len(i0))]
 
             start1 += args.batch_singles
         start0 += args.batch_singles
 
+
 if len(data['stat']) > 0:
     for key in data:
         data[key] = numpy.concatenate(data[key])

diff --git a/bin/hdfcoinc/pycbc_coinc_hdfinjfind b/bin/hdfcoinc/pycbc_coinc_hdfinjfind
@@ -251,7 +251,7 @@ for trigger_file, injection_file in zip(args.trigger_files,
 
     if multi_ifo_style:
         for key in f['segments'].keys():
-            if 'foreground' in key:
+            if 'foreground' in key or 'coinc' in key:
                 continue
             if key not in fo:
                 fo.create_group(key)

diff --git a/bin/hdfcoinc/pycbc_coinc_mergetrigs b/bin/hdfcoinc/pycbc_coinc_mergetrigs
@@ -34,6 +34,9 @@ parser.add_argument('--version', action='version', version=pycbc.version.git_ver
 parser.add_argument('--trigger-files', nargs='+')
 parser.add_argument('--output-file')
 parser.add_argument('--bank-file')
+parser.add_argument('--compression-level', type=int, default=6,
+                    help='Set HDF compression level in the output file '
+                         '(default 6)')
 parser.add_argument('--verbose', '-v', action='count')
 args = parser.parse_args()
 
@@ -138,8 +141,10 @@ full_boundaries = numpy.concatenate([full_boundaries, [len(trigger_hashes)]])
 del trigger_hashes
 
 idlen = (template_boundaries[1:] - template_boundaries[:-1])
-f.create_dataset('%s/template_id' % ifo, data=numpy.repeat(template_ids, idlen), 
-                 compression='gzip', shuffle=True, compression_opts=9)
+f.create_dataset('%s/template_id' % ifo,
+                 data=numpy.repeat(template_ids, idlen),
+                 compression='gzip', shuffle=True,
+                 compression_opts=args.compression_level)
 f['%s/template_boundaries' % ifo] = full_boundaries[unsort]
 
 logging.info('reading the trigger columns from the input files')
@@ -149,7 +154,8 @@ for col in trigger_columns:
     data = collect(key, args.trigger_files)[trigger_sort]
     logging.info('writing %s to file' % col)
     dset = f.create_dataset(key, data=data, compression='gzip',
-                                 compression_opts=9, shuffle=True)
+                            compression_opts=args.compression_level,
+                            shuffle=True)
     del data
     region(f, key, full_boundaries, unsort) 
 f.close()

diff --git a/bin/hdfcoinc/pycbc_fit_sngls_binned b/bin/hdfcoinc/pycbc_fit_sngls_binned
@@ -96,11 +96,15 @@ parser.add_argument("--bin-param", required=True,
                     "Choose from mchirp, mtotal, template_duration or a named "
                     "frequency cutoff in pnutils or a frequency function in "
                     "LALSimulation")
-parser.add_argument("--bin-spacing", choices=["linear", "log"],
+parser.add_argument("--bin-spacing", choices=["linear", "log", "irregular"],
                     help="How to space parameter bin edges")
-parser.add_argument("--num-bins", type=int,
+binopt = parser.add_mutually_exclusive_group(required=True)
+binopt.add_argument("--num-bins", type=int,
                     help="Number of regularly spaced bins to use over the "
                     " parameter")
+binopt.add_argument("--irregular-bins",
+                    help="Comma-separated list of parameter bin edges. "
+                    "Required if --bin-spacing = irregular")
 parser.add_argument("--bin-param-units",
                     help="String to display units of the binning parameter")
 parser.add_argument("--approximant", default="SEOBNRv4",
@@ -153,6 +157,13 @@ if args.plot_dir:
         args.plot_dir += '/'
     plotbase = args.plot_dir + args.ifo + "-" + args.user_tag
 
+## Check option logic
+if args.bin_spacing == "irregular":
+    if args.irregular_bins is None:
+        raise RuntimeError("Must specify a list of irregular bin edges!")
+    else:
+        args.bin_edges = [float(b) for b in args.irregular_bins.split(',')]
+
 logging.info('Opening trigger file: %s' % args.trigger_file)
 trigf = h5py.File(args.trigger_file, 'r')
 logging.info('Opening template file: %s' % args.bank_file)
@@ -287,6 +298,18 @@ else:
     binpars = get_pars(args, 'bin', m1, m2, s1z, s2z)
 logging.info("Parameter range of triggers: %f - %f" %
                                                   (min(binpars), max(binpars)))
+
+# remove triggers outside irregular bins
+if args.bin_spacing == "irregular":
+    logging.info("Removing triggers outside bin range %f - %f" %
+                                      (min(args.bin_edges), max(args.bin_edges)))
+    in_range = np.logical_and(binpars >= min(args.bin_edges),
+                                  binpars <= max(args.bin_edges))
+    binpars = binpars[in_range]
+    stat = stat[in_range]
+    tid = tid[in_range]
+    logging.info("%i remain" % len(binpars))
+
 # get the bins
 # we assume that parvals are all positive
 assert min(binpars) >= 0
@@ -296,6 +319,9 @@ if args.bin_spacing == "linear":
     pbins = bin_utils.LinearBins(pmin, pmax, args.num_bins)
 elif args.bin_spacing == "log":
     pbins = bin_utils.LogarithmicBins(pmin, pmax, args.num_bins)
+elif args.bin_spacing == "irregular":
+    pbins = bin_utils.IrregularBins(args.bin_edges)
+
 # list of bin indices
 binind = [pbins[c] for c in pbins.centres()]
 logging.info("Assigning trigger param values to bins")