diff --git a/bin/pycbc_live b/bin/pycbc_live index 7ad75626ca2..a0385ec358b 100755 --- a/bin/pycbc_live +++ b/bin/pycbc_live @@ -1175,7 +1175,7 @@ with ctx: results = {} evnt.live_detectors = set() - # Create objects to track whether the psd has been recalculated + # Create dict to track whether the psd has been recalculated psd_recalculated = {ifo: False for ifo in ifos} for ifo in ifos: diff --git a/examples/live/run.sh b/examples/live/run.sh index 8918c5874a1..7700d65df32 100755 --- a/examples/live/run.sh +++ b/examples/live/run.sh @@ -2,104 +2,27 @@ # example/test of running PyCBC Live on simulated data -set -e - export OMP_NUM_THREADS=4 export HDF5_USE_FILE_LOCKING="FALSE" -gps_start_time=1272790000 -gps_end_time=1272790512 -f_min=18 - - -# test if there is a template bank. If not, make one - -if [[ ! -f template_bank.hdf ]] -then - echo -e "\\n\\n>> [`date`] Making template bank" - curl \ - --remote-name \ - --silent \ - --show-error \ - https://raw.githubusercontent.com/gwastro/pycbc-config/710dbfd3590bd93d7679d7822da59fcb6b6fac0f/O2/bank/H1L1-HYPERBANK_SEOBNRv4v2_VARFLOW_THORNE-1163174417-604800.xml.gz - - pycbc_coinc_bank2hdf \ - --bank-file H1L1-HYPERBANK_SEOBNRv4v2_VARFLOW_THORNE-1163174417-604800.xml.gz \ - --output-file template_bank_full.hdf - - rm -f H1L1-HYPERBANK_SEOBNRv4v2_VARFLOW_THORNE-1163174417-604800.xml.gz - - pycbc_hdf5_splitbank \ - --bank-file template_bank_full.hdf \ - --output-prefix template_bank_ \ - --random-sort \ - --random-seed 831486 \ - --templates-per-bank 50 - - mv template_bank_0.hdf template_bank.hdf - rm -f template_bank_*.hdf -else - echo -e "\\n\\n>> [`date`] Pre-existing template bank found" -fi - - -# test if there is a single fits file. If not, make a representative one -if [[ ! -f single_trigger_fits.hdf ]] -then - echo -e "\\n\\n>> [`date`] Making single fits file" - python make_singles_fits_file.py -else - echo -e "\\n\\n>> [`date`] Pre-existing single fits file found" -fi - - -# test if there is a injection file. -# If not, make one and delete any existing strain - -if [[ -f injections.hdf ]] -then - echo -e "\\n\\n>> [`date`] Pre-existing injections found" -else - echo -e "\\n\\n>> [`date`] Generating injections" - - rm -rf ./strain - - ./generate_injections.py -fi +# Get the current time in UTC format +current_time=$(date -u +"%Y-%m-%d %H:%M:%S") +# GPS time started on January 6, 1980 (Unix timestamp: 315964800) +gps_epoch_timestamp=315964800 -# test if strain files exist. If they don't, make them +# Convert the current time to seconds since the GPS epoch +gps_start_time=$(($(date -u -d "$current_time" +"%s") - $gps_epoch_timestamp)) +gps_end_time=$((gps_start_time + 512)) -if [[ ! -d ./strain ]] -then - echo -e "\\n\\n>> [`date`] Generating simulated strain" - - function simulate_strain { # detector PSD_model random_seed - mkdir -p strain/$1 - - out_path="strain/$1/$1-SIMULATED_STRAIN-{start}-{duration}.gwf" +echo "GPS Time: $gps_start_time" +echo "Current UTC Time: $current_time" +echo "GPS End Time: $gps_end_time" - pycbc_condition_strain \ - --fake-strain $2 \ - --fake-strain-seed $3 \ - --output-strain-file $out_path \ - --gps-start-time $gps_start_time \ - --gps-end-time $4 \ - --sample-rate 16384 \ - --low-frequency-cutoff 10 \ - --channel-name $1:SIMULATED_STRAIN \ - --frame-duration 32 \ - --injection-file injections.hdf - } - # L1 ends 32s later, so that we can inject in single-detector time - simulate_strain H1 aLIGOMidLowSensitivityP1200087 1234 $((gps_end_time - 32)) - simulate_strain L1 aLIGOMidLowSensitivityP1200087 2345 $gps_end_time - simulate_strain V1 AdVEarlyLowSensitivityP1200087 3456 $((gps_end_time - 32)) - -else - echo -e "\\n\\n>> [`date`] Pre-existing strain data found" -fi +f_min=17 +CONF_DIR=/home/pycbc.live/analysis/prod/o4/full_bandwidth/ +#--bank-file ${CONF_DIR}/bank/O4_DESIGN_OPT_FLOW_HYBRID_BANK_O3_CONFIG.hdf \ # make phase-time-amplitude histogram files, if needed @@ -112,22 +35,18 @@ else echo -e "\\n\\n>> [`date`] Pre-existing phase-time-amplitude files found" fi - # delete old outputs if they exist rm -rf ./output - echo -e "\\n\\n>> [`date`] Running PyCBC Live" - mpirun \ --host localhost,localhost \ --n 2 \ ---bind-to none \ - -x PYTHONPATH -x LD_LIBRARY_PATH -x OMP_NUM_THREADS -x VIRTUAL_ENV -x PATH -x HDF5_USE_FILE_LOCKING \ -\ + -hostfile mpi_hosts.txt \ + -n 4 \ + -ppn 1 \ + \ python -m mpi4py `which pycbc_live` \ ---bank-file template_bank.hdf \ +--bank-file bank_3k.hdf \ --sample-rate 2048 \ --enable-bank-start-frequency \ --low-frequency-cutoff ${f_min} \ @@ -150,7 +69,7 @@ python -m mpi4py `which pycbc_live` \ --highpass-reduction 200 \ --psd-samples 30 \ --max-psd-abort-distance 600 \ ---min-psd-abort-distance 20 \ +--min-psd-abort-distance 68 \ --psd-abort-difference .15 \ --psd-recalculate-difference .01 \ --psd-inverse-length 3.5 \ @@ -158,20 +77,21 @@ python -m mpi4py `which pycbc_live` \ --trim-padding .5 \ --store-psd \ --increment-update-cache \ - H1:strain/H1 \ - L1:strain/L1 \ - V1:strain/V1 \ + H1:/dev/shm/kafka/H1_O3ReplayMDC \ + L1:/dev/shm/kafka/L1_O3ReplayMDC \ --frame-src \ - H1:strain/H1/* \ - L1:strain/L1/* \ - V1:strain/V1/* \ ---frame-read-timeout 10 \ + H1:/dev/shm/kafka/H1_O3ReplayMDC/* \ + L1:/dev/shm/kafka/L1_O3ReplayMDC/* \ +--frame-read-timeout 50 \ --channel-name \ - H1:SIMULATED_STRAIN \ - L1:SIMULATED_STRAIN \ - V1:SIMULATED_STRAIN \ + H1:GDS-CALIB_STRAIN_INJ1_O3Replay \ + L1:GDS-CALIB_STRAIN_INJ1_O3Replay \ +--state-channel \ + H1:GDS-CALIB_STATE_VECTOR \ + L1:GDS-CALIB_STATE_VECTOR \ --processing-scheme cpu:4 \ --fftw-measure-level 0 \ +--fftw-input-float-wisdom-file ${CONF_DIR}/cit/fftw_wisdom \ --fftw-threads-backend openmp \ --increment 8 \ --max-batch-size 16777216 \ @@ -179,7 +99,8 @@ python -m mpi4py `which pycbc_live` \ --day-hour-output-prefix \ --sngl-ranking newsnr_sgveto_psdvar_threshold \ --ranking-statistic phasetd \ ---statistic-files statHL.hdf statHV.hdf statLV.hdf \ +--statistic-files \ + statHL.hdf \ --sgchisq-snr-threshold 4 \ --sgchisq-locations "mtotal>40:20-30,20-45,20-60,20-75,20-90,20-105,20-120" \ --enable-background-estimation \ @@ -187,59 +108,21 @@ python -m mpi4py `which pycbc_live` \ --timeslide-interval 0.1 \ --pvalue-combination-livetime 0.0005 \ --ifar-double-followup-threshold 0.0001 \ ---ifar-upload-threshold 0.0001 \ +--ifar-upload-threshold 0.0002 \ --round-start-time 4 \ --start-time $gps_start_time \ --end-time $gps_end_time \ --src-class-mchirp-to-delta 0.01 \ --src-class-eff-to-lum-distance 0.74899 \ --src-class-lum-distance-to-delta -0.51557 -0.32195 \ ---run-snr-optimization \ ---snr-opt-di-maxiter 50 \ ---snr-opt-di-popsize 100 \ ---snr-opt-include-candidate \ ---snr-opt-seed 42 \ ---sngl-ifar-est-dist conservative \ ---single-newsnr-threshold 9 \ ---single-duration-threshold 7 \ ---single-reduced-chisq-threshold 2 \ ---single-fit-file single_trigger_fits.hdf \ ---verbose \ ---psd-variation - -# If you would like to use the pso optimizer, change --optimizer to pso -# and include these arguments while removing other optimizer args. -# You will need to install the pyswarms package into your environment. -# --snr-opt-pso-iters 5 \ -# --snr-opt-pso-particles 250 \ -# --snr-opt-pso-c1 0.5 \ -# --snr-opt-pso-c2 2.0 \ -# --snr-opt-pso-w 0.01 \ - -# note that, at this point, some SNR optimization processes may still be -# running, so the checks below may ignore their results - -# cat the logs of pycbc_optimize_snr so we can check them -for opt_snr_log in `find output -type f -name optimize_snr.log | sort` -do - echo -e "\\n\\n>> [`date`] Showing log of SNR optimizer, ${opt_snr_log}" - cat ${opt_snr_log} -done - -echo -e "\\n\\n>> [`date`] Checking results" -./check_results.py \ - --gps-start ${gps_start_time} \ - --gps-end ${gps_end_time} \ - --f-min ${f_min} \ - --bank template_bank.hdf \ - --injections injections.hdf \ - --detectors H1 L1 V1 - -echo -e "\\n\\n>> [`date`] Running Bayestar" -for XMLFIL in `find output -type f -name \*.xml\* | sort` -do - pushd `dirname ${XMLFIL}` - bayestar-localize-coincs --f-low ${f_min} `basename ${XMLFIL}` `basename ${XMLFIL}` - test -f 0.fits - popd -done +--enable-profiling 1 \ +--psd-variation \ +--verbose + +#--sngl-ranking newsnr_sgveto_psdvar_threshold \ +#--ranking-statistic phasetd_exp_fit_fgbg_bbh_norm \ +# statHV.hdf \ +# statLV.hdf \ +# statHLV.hdf \ +# V1:Hrec_hoft_16384Hz_INJ1_O3Replay \ +# V1:DQ_ANALYSIS_STATE_VECTOR \ diff --git a/pycbc/filter/matchedfilter.py b/pycbc/filter/matchedfilter.py index 5ca5c3deb86..616289f9c08 100644 --- a/pycbc/filter/matchedfilter.py +++ b/pycbc/filter/matchedfilter.py @@ -1662,7 +1662,6 @@ def process_all(self): veto_info = [tmp[i] for i in sort] result = self._process_vetoes(result, veto_info) - return result def _process_vetoes(self, results, veto_info): @@ -1671,7 +1670,6 @@ def _process_vetoes(self, results, veto_info): dof = numpy.array(numpy.zeros(len(veto_info)), numpy.uint32, ndmin=1) sg_chisq = numpy.array(numpy.zeros(len(veto_info)), numpy.float32, ndmin=1) - results['chisq'] = chisq results['chisq_dof'] = dof results['sg_chisq'] = sg_chisq diff --git a/pycbc/psd/variation.py b/pycbc/psd/variation.py index e570d3288cd..77a6e40992c 100644 --- a/pycbc/psd/variation.py +++ b/pycbc/psd/variation.py @@ -297,18 +297,18 @@ def live_calc_psd_variation(strain, Calculate the psd variation in the PyCBC Live search. The Live strain data is convolved with the filter to produce a timeseries - containing the PSD variation values for each sample. This mean square of - the timeseries is then taken over the short_stride to remove the effects of - short duration glitches and further outliers from the mean are replaced - within the array. This array is then further averaged every second to - produce a timeseries that will contain a number of value equal to the - increment. + containing the PSD variation values for each sample. The mean square of + the timeseries is calculated over the short_stride to find outliers caused + by short duration glitches. Outliers are replaced with the average of + adjacent elements in the array. This array is then further averaged every + second to produce a timeseries that will contain a number of value equal + to the increment. Parameters ---------- strain : pycbc.timeseries Live data being searched through by the PyCBC Live search. - full_filt : some sort of array, scipy so i guess numpy? + full_filt : numpy.ndarray A filter created by `live_create_filter`. increment : float The number of seconds in each increment in the PyCBC Live search. @@ -329,12 +329,11 @@ def live_calc_psd_variation(strain, """ sample_rate = int(strain.sample_rate) - # Grab the last increments worth of data with extra to account for the need - # to trim the data to remove edge effects. - astrain = strain.time_slice(strain.end_time - increment - (data_trim*3), + # Grab the last increments worth of data, plus padding for edge effects. + astrain = strain.time_slice(strain.end_time - increment - (data_trim * 3), strain.end_time) - # Convole the data and the filter to produce the PSD variation timeseries, + # Convolve the data and the filter to produce the PSD variation timeseries, # then trim the beginning and end of the data to prevent edge effects. wstrain = sig.fftconvolve(astrain, full_filt, mode='same') wstrain = wstrain[int(data_trim * sample_rate):-int(data_trim * sample_rate)] @@ -344,26 +343,25 @@ def live_calc_psd_variation(strain, short_ms = numpy.mean( wstrain.reshape(-1, int(sample_rate * short_stride)) ** 2, axis=1) - # Create an array of averages to substitute out outliers in the PSD - # variation array + # Define an array of averages that is used to substitute outliers ave = 0.5 * (short_ms[2:] + short_ms[:-2]) outliers = short_ms[1:-1] > (2. * ave) short_ms[1:-1][outliers] = ave[outliers] # Calculate the average of the PSD variation array for every second + # short_ms contains the mean square of the PSD variation timeseries every + # short_stride. To calculate the average of these values every second we + # calculate the number of samples_per_second and iterate through short_ms + # until all seconds have been calculated. m_s = [] - stride = 1 / short_stride - for idx in range(int(len(short_ms) / stride)): - start = int(stride * idx) - end = int(stride * (idx + 1)) + samples_per_second = 1 / short_stride + for idx in range(int(len(short_ms) / samples_per_second)): + start = int(samples_per_second * idx) + end = int(samples_per_second * (idx + 1)) m_s.append(numpy.mean(short_ms[start:end])) - - # Convert m_s to a numpy array m_s = numpy.array(m_s, dtype=wstrain.dtype) - # Convert the m_s numpy array to a pycbc timeseries which now contains the - # psd variation value every second. psd_var = TimeSeries(m_s, delta_t=1.0, epoch=strain.end_time - increment - (data_trim * 2)) @@ -374,14 +372,12 @@ def live_calc_psd_variation(strain, def live_find_var_value(triggers, psd_var_timeseries): """ - Interpolate between PSD variation values to find the PSD variation value - associated with a specific trigger. + Extract the PSD variation values at trigger times by linear interpolation Parameters ---------- triggers : dict - A dictionary containing the trigger values to find the PSD variation - of. + Dictionary containing input trigger times psd_var_timeseries : pycbc.timeseries A timeseries containing the PSD variation value for each second of the latest increment in PyCBC Live. Created by live_calc_psd_variation. @@ -393,14 +389,13 @@ def live_find_var_value(triggers, triggers. """ - # Find gps time of the trigger - trigger_times = triggers['end_time'] - - # Interpolate between values + # Create the interpolator interpolator = interp1d(psd_var_timeseries.sample_times.numpy(), psd_var_timeseries.numpy(), fill_value=1.0, bounds_error=False) - psd_var_vals = interpolator(trigger_times) + + # Evaluate at the trigger times + psd_var_vals = interpolator(triggers['end_time']) return psd_var_vals