Skip to content

Commit

Permalink
[pycbc live] Allowing the use of psd variation in the ranking statist…
Browse files Browse the repository at this point in the history
…ic for pycbc live (gwastro#4533)

* Modifying files to include psd variation in single detector statistic calculation

* ending variation.py with a blank line

* Changing to an increment agnostic solution

* removing change already fixed

* Updating function names and docstrings

* removing ToDos and adding more helpful comments

* Removing unused import

* Codeclimate fixes

* Removing excess logging and whitespace mistakes

* Removing unused objects + codeclimate fixes

* Updating comments and docstrings, removing matchedfilter changes

* Revert "Updating comments and docstrings, removing matchedfilter changes"

This reverts commit 0e6473a.

* Removing matchedfilter changes, updating comments and docstrings

* Move --verbose to the end of the commands

* more comment updates

* Repositioning filter recreation

* Changes to comments and removing whitespace

Co-authored-by: Thomas Dent <thomas.dent@usc.es>

* removing refchecks

* Adding option veification for psd variation

* Apply suggestions from code review

Co-authored-by: Thomas Dent <thomas.dent@usc.es>

* fixing EOL error

* Refactoring the filter creation function

* codeclimate fixes

* undo

* full_filt func

* removing indentation

* code climate

* code climate

* try to quiet codeclimate

* codeclimate doesn't know PEP8

* brackets obviate line continuation

---------

Co-authored-by: Thomas Dent <thomas.dent@usc.es>
  • Loading branch information
2 people authored and bhooshan-gadre committed Dec 19, 2023
1 parent 76a910c commit 68a490d
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 17 deletions.
37 changes: 37 additions & 0 deletions bin/pycbc_live
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ from pycbc import mchirp_area
from pycbc.detector import ppdets
from pycbc.filter import resample
from pycbc.psd import estimate
from pycbc.psd import variation
from pycbc.live import snr_optimizer

# Use cached class-based FFTs in the resample and estimate module
Expand Down Expand Up @@ -996,6 +997,11 @@ parser.add_argument('--embright-massgap-max', type=float, default=5.0, metavar='
'HasMassGap probability.')
parser.add_argument('--skymap-only-ifos', nargs='+',
help="Detectors that only contribute in sky localization")
parser.add_argument('--psd-variation', action='store_true',
help="Run the psd variation code to produce psd variation "
"values for each single detector triggers found by "
"the search. Required when using a single detector "
"ranking statistic that includes psd variation.")

scheme.insert_processing_option_group(parser)
LiveSingle.insert_args(parser)
Expand All @@ -1010,6 +1016,7 @@ args = parser.parse_args()

scheme.verify_processing_options(args, parser)
fft.verify_fft_options(args, parser)
Coincer.verify_args(args, parser)
ifos = set(args.channel_name.keys())
analyze_singles = LiveSingle.verify_args(args, parser, ifos)

Expand Down Expand Up @@ -1162,6 +1169,11 @@ with ctx:
last_bg_dump_time = int(data_end())
psd_count = {ifo:0 for ifo in ifos}

# Create dicts to track whether the psd has been recalculated and to hold
# psd variation filters
psd_recalculated = {ifo: True for ifo in ifos}
psd_var_filts = {ifo: None for ifo in ifos}

while data_end() < args.end_time:
t1 = pycbc.gps_now()
logging.info('Analyzing from %s', data_end())
Expand All @@ -1177,6 +1189,9 @@ with ctx:
)
if status and psd_count[ifo] == 0:
status = data_reader[ifo].recalculate_psd()
# If the psd has been recalculated then we need a new
# filter for psd variation calculation
psd_recalculated[ifo] = True
psd_count[ifo] = args.psd_recompute_length - 1
elif not status:
psd_count[ifo] = 0
Expand Down Expand Up @@ -1242,6 +1257,28 @@ with ctx:
if len(results[ifo][key]):
results[ifo][key] = results[ifo][key][idx]

# Calculate and add the psd variation for the results
if args.psd_variation:

for ifo in results:
logging.info(f"Calculating PSD Variation Statistic for {ifo}")

# A new filter is needed if the PSD has been recalculated
if psd_recalculated[ifo] is True:
psd_var_filts[ifo] = variation.live_create_filter(data_reader[ifo].psd,
args.psd_segment_length,
int(args.sample_rate))
psd_recalculated[ifo] = False

psd_var_ts = variation.live_calc_psd_variation(data_reader[ifo].strain,
psd_var_filts[ifo],
args.increment)

psd_var_vals = variation.live_find_var_value(results[ifo],
psd_var_ts)

results[ifo]['psd_var_val'] = psd_var_vals

# Look for coincident triggers and do background estimation
if args.enable_background_estimation:
coinc_results = coinc_pool.broadcast(get_coinc, results)
Expand Down
3 changes: 2 additions & 1 deletion examples/live/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ python -m mpi4py `which pycbc_live` \
--max-batch-size 16777216 \
--output-path output \
--day-hour-output-prefix \
--sngl-ranking newsnr_sgveto \
--sngl-ranking newsnr_sgveto_psdvar_threshold \
--ranking-statistic phasetd \
--statistic-files statHL.hdf statHV.hdf statLV.hdf \
--sgchisq-snr-threshold 4 \
Expand All @@ -204,6 +204,7 @@ python -m mpi4py `which pycbc_live` \
--single-duration-threshold 7 \
--single-reduced-chisq-threshold 2 \
--single-fit-file single_trigger_fits.hdf \
--psd-variation \
--verbose

# If you would like to use the pso optimizer, change --optimizer to pso
Expand Down
8 changes: 8 additions & 0 deletions pycbc/events/coinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,14 @@ def insert_args(parser):
group.add_argument('--ifar-remove-threshold', type=float,
help="NOT YET IMPLEMENTED", default=100.0)

@staticmethod
def verify_args(args, parser):
"""Verify that psd-var-related options are consistent"""
if ((hasattr(args, 'psd_variation') and not args.psd_variation)
and 'psdvar' in args.sngl_ranking):
parser.error(f"The single ifo ranking stat {args.sngl_ranking} "
"requires --psd-variation.")

@property
def background_time(self):
"""Return the amount of background time that the buffers contain"""
Expand Down
220 changes: 204 additions & 16 deletions pycbc/psd/variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,50 @@
import numpy
from numpy.fft import rfft, irfft
import scipy.signal as sig

from scipy.interpolate import interp1d

import pycbc.psd
from pycbc.types import TimeSeries


def create_full_filt(freqs, filt, plong, srate, psd_duration):
"""Create a filter to convolve with strain data to find PSD variation.
Parameters
----------
freqs : numpy.ndarray
Array of sample frequencies of the PSD.
filt : numpy.ndarray
A bandpass filter.
plong : numpy.ndarray
The estimated PSD.
srate : float
The sample rate of the data.
psd_duration : float
The duration of the estimated PSD.
Returns
-------
full_filt : numpy.ndarray
The full filter used to calculate PSD variation.
"""

# Make the weighting filter - bandpass, which weight by f^-7/6,
# and whiten. The normalization is chosen so that the variance
# will be one if this filter is applied to white noise which
# already has a variance of one.
fweight = freqs ** (-7./6.) * filt / numpy.sqrt(plong)
fweight[0] = 0.
norm = (sum(abs(fweight) ** 2) / (len(fweight) - 1.)) ** -0.5
fweight = norm * fweight
fwhiten = numpy.sqrt(2. / srate) / numpy.sqrt(plong)
fwhiten[0] = 0.
full_filt = sig.hann(int(psd_duration * srate)) * numpy.roll(
irfft(fwhiten * fweight), int(psd_duration / 2) * srate)

return full_filt


def mean_square(data, delta_t, srate, short_stride, stride):
""" Calculate mean square of given time series once per stride
Expand Down Expand Up @@ -154,18 +192,7 @@ def calc_filt_psd_variation(strain, segment, short_segment, psd_long_segment,
freqs = numpy.array(plong.sample_frequencies, dtype=fs_dtype)
plong = plong.numpy()

# Make the weighting filter - bandpass, which weight by f^-7/6,
# and whiten. The normalization is chosen so that the variance
# will be one if this filter is applied to white noise which
# already has a variance of one.
fweight = freqs ** (-7./6.) * filt / numpy.sqrt(plong)
fweight[0] = 0.
norm = (sum(abs(fweight) ** 2) / (len(fweight) - 1.)) ** -0.5
fweight = norm * fweight
fwhiten = numpy.sqrt(2. / srate) / numpy.sqrt(plong)
fwhiten[0] = 0.
full_filt = sig.hann(int(psd_duration * srate)) * numpy.roll(
irfft(fwhiten * fweight), int(psd_duration / 2) * srate)
full_filt = create_full_filt(freqs, filt, plong, srate, psd_duration)
# Convolve the filter with long segment of data
wstrain = sig.fftconvolve(astrain, full_filt, mode='same')
wstrain = wstrain[int(strain_crop * srate):-int(strain_crop * srate)]
Expand Down Expand Up @@ -206,10 +233,171 @@ def find_trigger_value(psd_var, idx, start, sample_rate):
# Extract the PSD variation at trigger time through linear
# interpolation
if not hasattr(psd_var, 'cached_psd_var_interpolant'):
from scipy import interpolate
psd_var.cached_psd_var_interpolant = \
interpolate.interp1d(psd_var.sample_times.numpy(), psd_var.numpy(),
fill_value=1.0, bounds_error=False)
interp1d(psd_var.sample_times.numpy(),
psd_var.numpy(),
fill_value=1.0,
bounds_error=False)
vals = psd_var.cached_psd_var_interpolant(time)

return vals


def live_create_filter(psd_estimated,
psd_duration,
sample_rate,
low_freq=20,
high_freq=480):
"""
Create a filter to be used in the calculation of the psd variation for the
PyCBC Live search. This filter combines a bandpass between a lower and
upper frequency and an estimated signal response so that the variance
will be 1 when the filter is applied to white noise.
Within the PyCBC Live search this filter needs to be recreated every time
the estimated psd is updated and needs to be unique for each detector.
Parameters
----------
psd_estimated : pycbc.frequencyseries
The current PyCBC Live PSD: variations are measured relative to this
estimate.
psd_duration : float
The duration of the estimation of the psd, in seconds.
sample_rate : int
The sample rate of the strain data being search over.
low_freq : int (default = 20)
The lower frequency to apply in the bandpass filter.
high_freq : int (default = 480)
The upper frequency to apply in the bandpass filter.
Returns
-------
full_filt : numpy.ndarray
The complete filter to be convolved with the strain data to
find the psd variation value.
"""

# Create a bandpass filter between low_freq and high_freq once
filt = sig.firwin(4 * sample_rate,
[low_freq, high_freq],
pass_zero=False,
window='hann',
nyq=sample_rate / 2)
filt.resize(int(psd_duration * sample_rate))

# Fourier transform the filter and take the absolute value to get
# rid of the phase.
filt = abs(rfft(filt))

# Extract the psd frequencies to create a representative filter.
freqs = numpy.array(psd_estimated.sample_frequencies, dtype=numpy.float32)
plong = psd_estimated.numpy()
full_filt = create_full_filt(freqs, filt, plong, sample_rate, psd_duration)

return full_filt


def live_calc_psd_variation(strain,
full_filt,
increment,
data_trim=2.0,
short_stride=0.25):
"""
Calculate the psd variation in the PyCBC Live search.
The Live strain data is convolved with the filter to produce a timeseries
containing the PSD variation values for each sample. The mean square of
the timeseries is calculated over the short_stride to find outliers caused
by short duration glitches. Outliers are replaced with the average of
adjacent elements in the array. This array is then further averaged every
second to produce the PSD variation timeseries.
Parameters
----------
strain : pycbc.timeseries
Live data being searched through by the PyCBC Live search.
full_filt : numpy.ndarray
A filter created by `live_create_filter`.
increment : float
The number of seconds in each increment in the PyCBC Live search.
data_trim : float
The number of seconds to be trimmed from either end of the convolved
timeseries to prevent artefacts.
short_stride : float
The number of seconds to average the PSD variation timeseries over to
remove the effects of short duration glitches.
Returns
-------
psd_var : pycbc.timeseries
A timeseries containing the PSD variation values.
"""
sample_rate = int(strain.sample_rate)

# Grab the last increments worth of data, plus padding for edge effects.
astrain = strain.time_slice(strain.end_time - increment - (data_trim * 3),
strain.end_time)

# Convolve the data and the filter to produce the PSD variation timeseries,
# then trim the beginning and end of the data to prevent edge effects.
wstrain = sig.fftconvolve(astrain, full_filt, mode='same')
wstrain = wstrain[int(data_trim * sample_rate):-int(data_trim * sample_rate)]

# Create a PSD variation array by taking the mean square of the PSD
# variation timeseries every short_stride
short_ms = numpy.mean(
wstrain.reshape(-1, int(sample_rate * short_stride)) ** 2, axis=1)

# Define an array of averages that is used to substitute outliers
ave = 0.5 * (short_ms[2:] + short_ms[:-2])
outliers = short_ms[1:-1] > (2. * ave)
short_ms[1:-1][outliers] = ave[outliers]

# Calculate the PSD variation every second by a moving window average
# containing (1/short_stride) short_ms samples.
m_s = []
samples_per_second = 1 / short_stride
for idx in range(int(len(short_ms) / samples_per_second)):
start = int(samples_per_second * idx)
end = int(samples_per_second * (idx + 1))
m_s.append(numpy.mean(short_ms[start:end]))

m_s = numpy.array(m_s, dtype=wstrain.dtype)
psd_var = TimeSeries(m_s,
delta_t=1.0,
epoch=strain.end_time - increment - (data_trim * 2))

return psd_var


def live_find_var_value(triggers,
psd_var_timeseries):
"""
Extract the PSD variation values at trigger times by linear interpolation.
Parameters
----------
triggers : dict
Dictionary containing input trigger times.
psd_var_timeseries : pycbc.timeseries
A timeseries containing the PSD variation value for each second of the
latest increment in PyCBC Live.
Returns
-------
psd_var_vals : numpy.ndarray
Array of interpolated PSD variation values at trigger times.
"""

# Create the interpolator
interpolator = interp1d(psd_var_timeseries.sample_times.numpy(),
psd_var_timeseries.numpy(),
fill_value=1.0,
bounds_error=False)
# Evaluate at the trigger times
psd_var_vals = interpolator(triggers['end_time'])

return psd_var_vals

0 comments on commit 68a490d

Please sign in to comment.