Skip to content

Commit

Permalink
IWH comments
Browse files Browse the repository at this point in the history
  • Loading branch information
GarethCabournDavies committed Dec 13, 2023
1 parent 0e76579 commit 795ef46
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 82 deletions.
9 changes: 5 additions & 4 deletions bin/minifollowups/pycbc_injection_minifollowup
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,13 @@ trigger_times = {}
for trig in single_triggers:
ifo = trig.ifo
with HFile(trig.lfn, 'r') as trig_f:
trigger_idx[ifo], trigger_times[ifo], trigger_snrs[ifo] = \
trigger_idx[ifo], data_tuple = \
trig_f.select(
nearby_missedinj,
f'{ifo}/end_time',
f'{ifo}/snr',
return_indices=True)
)
trigger_times[ifo], trigger_snrs[ifo] = data_tuple

if len(missed) < num_events:
num_events = len(missed)
Expand Down Expand Up @@ -309,10 +310,10 @@ for num_event in range(num_events):
# Finding loudest template in this detector near to the injection:
# First, find triggers close to the missed injection
single_fname = args.single_detector_triggers[curr_ifo]
idx = HFile(single_fname).select(
idx, _ = HFile(single_fname).select(
lambda t: abs(t - inj_params['tc']) < args.inj_window,
f'{curr_ifo}/end_time',
indices_only=True,
return_data=False,
)

if len(idx) == 0:
Expand Down
3 changes: 2 additions & 1 deletion bin/minifollowups/pycbc_page_snglinfo
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ else:
# Name would be too long - just call it ranking statistic
stat_name = 'Ranking Statistic'
stat_name_long = ' with '.join(
[args.ranking_statistic, args.sngl_ranking])
[args.ranking_statistic, args.sngl_ranking]
)

headers.append(stat_name)

Expand Down
4 changes: 2 additions & 2 deletions bin/minifollowups/pycbc_plot_trigger_timeseries
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ for ifo in args.single_trigger_files.keys():

# Identify trigger idxs within window of trigger time
with HFile(args.single_trigger_files[ifo], 'r') as data:
idx = data.select(
idx, _ = data.select(
lambda endtime: abs(endtime - t) < args.window,
'end_time',
group=ifo,
indices_only=True
return_data=False,
)
data_mask = numpy.zeros(data[ifo]['snr'].size, dtype=bool)
data_mask[idx] = True
Expand Down
18 changes: 11 additions & 7 deletions bin/plotting/pycbc_plot_singles_timefreq
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,17 @@ def rough_filter(snr, chisq, chisq_dof, end_time, tmp_id, tmp_dur):
return np.logical_and(end_time > opts.gps_start_time,
end_time < opts.gps_end_time + tmp_dur)

indices, snr, chisq, chisq_dof, end_time, template_ids, template_duration = \
trig_f.select(rough_filter, opts.detector + '/snr',
opts.detector + '/chisq', opts.detector + '/chisq_dof',
opts.detector + '/end_time',
opts.detector + '/template_id',
opts.detector + '/template_duration',
return_indices=True)
indices, data_tuple = trig_f.select(
rough_filter,
'snr',
'chisq',
'chisq_dof',
'end_time',
'template_id',
'template_duration',
group=opts.detector
)
snr, chisq, chisq_dof, end_time, template_ids, template_duration = data_tuple

if len(indices) > 0:
if opts.veto_file:
Expand Down
16 changes: 8 additions & 8 deletions bin/plotting/pycbc_plot_singles_vs_params
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ if opts.min_snr:
n_triggers_orig = trig_file[f'{opts.detector}/snr'].size
logging.info("Trigger file has %d triggers", n_triggers_orig)
logging.info('Generating trigger mask (on SNR)')
idx = trig_file.select(
idx, _ = trig_file.select(
lambda snr: snr >= opts.min_snr,
'snr',
group=opts.detector,
indices_only=True,
return_data=False,
)
logging.info('%d triggers after snr mask', idx.size)
data_mask = np.zeros(n_triggers_orig, dtype=bool)
Expand Down Expand Up @@ -139,12 +139,12 @@ if opts.max_y is not None:
x = x[mask]
y = y[mask]

title = '%s of %s triggers over %s and %s' % (opts.z_var.title(),
opts.detector, opts.x_var.title(), opts.y_var.title())
fig_caption = ("This plot shows the %s of single detector triggers for the %s "
"detector. %s is shown on the colorbar axis against %s and %s "
"on the x- and y-axes." % (opts.z_var, opts.detector,
opts.z_var.title(), opts.x_var, opts.y_var))
title = f'{opts.z_var.title()} of {opts.detector} triggers ' + \
f'over {opts.x_var.title()} and {opts.y_var.title()}'
fig_caption = f"This plot shows the {opts.z_var} of single detector " + \
f"triggers for the {opts.detector} detector. " + \
f"{opts.z_var.title()} is shown on the colorbar axis " + \
f"against {opts.x_var} and {opts.y_var} on the x- and y-axes."

if not any(mask):
# All triggers removed - make a blank plot which says so:
Expand Down
8 changes: 4 additions & 4 deletions examples/gw150914/PyCBCInspiral.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -909,14 +909,14 @@
"source": [
"h1_triggers = pycbc.io.hdf.SingleDetTriggers(\n",
" 'H1-INSPIRAL_FULL_DATA_JOB0-1126257771-1837.hdf',\n",
" 'H1'\n",
" 'bank_file=H1L1-GW150914_BANK-1126051217-3331800.hdf',\n",
" 'H1L1-GW150914_BANK-1126051217-3331800.hdf',\n",
" None, None, None, 'H1'\n",
")\n",
"\n",
"l1_triggers = pycbc.io.hdf.SingleDetTriggers(\n",
" 'L1-INSPIRAL_FULL_DATA_JOB0-1126258302-1591.hdf',\n",
" 'L1'\n",
" 'bank_file=H1L1-GW150914_BANK-1126051217-3331800.hdf',\n",
" 'H1L1-GW150914_BANK-1126051217-3331800.hdf',\n",
" None, None, None, 'L1'\n",
")\n",
"\n",
"imax = np.argmax(h1_triggers.snr)\n",
Expand Down
113 changes: 57 additions & 56 deletions pycbc/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
class HFile(h5py.File):
""" Low level extensions to the capabilities of reading an hdf5 File
"""
def select(self, fcn, *args, **kwds):
def select(self, fcn, *args, chunksize=10**6, derived=None, group='',
return_data=True, premask=None):
""" Return arrays from an hdf5 file that satisfy the given function
Parameters
Expand All @@ -42,45 +43,48 @@ def select(self, fcn, *args, **kwds):
A variable number of strings that are keys into the hdf5. These must
refer to arrays of equal length.
chunksize : {1e6, int}, optional
chunksize : {10**6, int}, optional
Number of elements to read and process at a time.
derived : dictionary
Dictionary keyed on function, values are the list of required
datasets. If giving dataset outputs, these will be added at the
end. The function must take in a dictionary keyed on dataset names.
Dictionary keyed on argument name (must be given in args), values
are a tuple of: the function to be computed, and the required
datasets. The function must take in a dictionary keyed on those
dataset names.
group : string, optional
The group within the h5py file containing the datasets, e.g. in
standard offline merged trigger files, this would be the IFO. This
can be included in the args manually, but is required in the case
of derived functions, e.g. newsnr.
return_indices : bool, optional
If True, also return the indices of elements passing the function.
indices_only : bool, optional
If True, only return the indices of elements passing the function.
return_data : bool, optional, default True
If True, return the data for elements passing the function.
premask : array of boolean values, optional
The pre-mask to apply to the triggers at read-in.
Returns
-------
values : np.ndarrays
A variable number of arrays depending on the number of keys into
the hdf5 file that are given. If return_indices is True, the first
element is an array of indices of elements passing the function.
indices: np.ndarray
An array of indices of elements passing the function.
return_tuple : tuple of np.ndarrays
A variable number of arrays depending on the number of
args provided,
If return_data is True, arrays are the values of each
arg.
If return_data is False, this is None.
>>> f = HFile(filename)
>>> snr = f.select(lambda snr: snr > 6, 'H1/snr')
"""

# Required datasets are the arguments requested and datasets given
# for any derived functions
derived = kwds.get('derived', {})
dsets = list(args)
for rqd_list in derived.values():
derived = derived if derived is not None else {}
dsets = [a for a in list(args) if a not in derived]
for _, rqd_list in derived.values():
dsets += rqd_list

# remove any duplicates from req_dsets
Expand All @@ -90,7 +94,6 @@ def select(self, fcn, *args, **kwds):
# check they can all be used together
refs = {}
size = None
group = kwds.get('group', '')
for ds in dsets:
refs[ds] = self[group + '/' + ds]
if (size is not None) and (refs[ds].size != size):
Expand All @@ -99,13 +102,11 @@ def select(self, fcn, *args, **kwds):
f"previous input datasets ({size}).")
size = refs[ds].size

# To conserve memory read the array in chunks
chunksize = kwds.get('chunksize', int(1e6))

if 'premask' not in kwds or kwds.get('premask') is None:
# Apply any pre-masks
if premask is None:
mask = np.ones(size, dtype=bool)
else:
mask = kwds['premask']
mask = premask

if not mask.dtype == bool:
# mask is an array of indices rather than booleans,
Expand All @@ -118,19 +119,13 @@ def select(self, fcn, *args, **kwds):
raise RuntimeError(f"Using premask of size {mask.size} which "
f"does not match the input datasets ({size}).")

# This will be the outputs:
return_indices = kwds.get('return_indices', False)
indices_only = kwds.get('indices_only', False)

# Arguments being returned:
# The name doesn't matter, so key on the function of
# derived datasets
ret_args = args + tuple(derived.keys())
# datasets being returned (possibly)
data = {}
indices = np.array([], dtype=np.uint64)
for arg in ret_args:
for arg in args:
data[arg] = []

# Loop through the chunks:
i = 0
while i < size:
r = i + chunksize if i + chunksize < size else size
Expand All @@ -143,32 +138,36 @@ def select(self, fcn, *args, **kwds):
# Read each chunk's worth of data
partial_data = {arg: refs[arg][i:r][mask[i:r]]
for arg in dsets}
partial = [partial_data[a] for a in args]
partial += [func(partial_data) for func in derived.keys()]
partial = []
for a in args:
if a in derived.keys():
# If this is a derived dataset, calculate it
derived_fcn = derived[a][0]
partial += [derived_fcn(partial_data)]
else:
# otherwise, just read from the file
partial += [partial_data[a]]

# Find where it passes the function
keep = fcn(*partial)
if return_indices or indices_only:
indices = np.concatenate([indices, np.flatnonzero(keep) + i])

# Store only the results that pass the function
for arg, part in zip(ret_args, partial):
if not indices_only:
# Keep the indices which pass the function:
indices = np.concatenate([indices, np.flatnonzero(keep) + i])

if return_data:
# Store the dataset results that pass the function
for arg, part in zip(args, partial):
data[arg].append(part[keep])

i += chunksize

return_tuple = tuple()
# Combine the partial results into full arrays
if indices_only or return_indices:
return_tuple += (indices.astype(np.uint64),)
if not indices_only:
return_tuple += tuple(np.concatenate(data[arg])
for arg in ret_args)

if len(return_tuple) == 1:
return return_tuple[0]
if return_data:
return_tuple = tuple(np.concatenate(data[arg])
for arg in args)
else:
return return_tuple
return_tuple = None

return indices.astype(np.uint64), return_tuple


class DictArray(object):
Expand Down Expand Up @@ -471,7 +470,7 @@ class SingleDetTriggers(object):
"""
def __init__(self, trig_file, detector, bank_file=None, veto_file=None,
segment_name=None, premask=None, filter_rank=None,
filter_threshold=None, chunksize=int(1e6), filter_func=None):
filter_threshold=None, chunksize=10**6, filter_func=None):
"""
Create a SingleDetTriggers instance
Expand Down Expand Up @@ -503,7 +502,7 @@ def __init__(self, trig_file, detector, bank_file=None, veto_file=None,
filter_threshold: float, required if filter_rank is used
Threshold to filter the ranking values
chunksize : int , default 1e6
chunksize : int , default 10**6
Size of chunks to read in for the filter_rank / threshold.
"""
logging.info('Loading triggers')
Expand All @@ -527,11 +526,13 @@ def __init__(self, trig_file, detector, bank_file=None, veto_file=None,
assert filter_threshold is not None
logging.info("Applying threshold of %.3f on %s",
filter_threshold, filter_rank)
idx = self.trigs_f.select(
fcn_dsets = (ranking.sngls_ranking_function_dict[filter_rank],
ranking.required_datasets[filter_rank])
idx, _ = self.trigs_f.select(
lambda rank: rank > filter_threshold,
derived={ranking.sngls_ranking_function_dict[filter_rank]:
ranking.required_datasets[filter_rank]},
indices_only=True,
filter_rank,
derived={filter_rank: fcn_dsets},
return_data=False,
premask=self.mask,
group=detector,
chunksize=chunksize,
Expand Down

0 comments on commit 795ef46

Please sign in to comment.