Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add search workflow run with condor + pegasus to github actions #3554

Merged
merged 3 commits into from
Dec 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions .github/workflows/search-workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: run small search using pegasus + condor

on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v1
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: install condor
run: |
wget -qO - https://research.cs.wisc.edu/htcondor/ubuntu/HTCondor-Release.gpg.key | sudo apt-key add -
echo "deb http://research.cs.wisc.edu/htcondor/ubuntu/8.9/focal focal contrib" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://research.cs.wisc.edu/htcondor/ubuntu/8.9/focal focal contrib" | sudo tee -a /etc/apt/sources.list
sudo apt-get update
sudo apt-get install minihtcondor
sudo systemctl start condor
sudo systemctl enable condor
- name: install pegasus
run: |
wget https://download.pegasus.isi.edu/pegasus/ubuntu/dists/bionic/main/binary-amd64/pegasus_4.9.3-1+ubuntu18_amd64.deb
sudo apt install ./pegasus_4.9.3-1+ubuntu18_amd64.deb
- run: sudo apt-get install *fftw3* intel-mkl*
- name: Install pycbc
run: |
python -m pip install --upgrade pip setuptools
pip install -r requirements.txt
pip install .
- name: retrieving frame data
run: bash -e examples/search/get.sh
- name: generating template bank
run: bash -e examples/search/bank.sh
- name: generating statistic files
run: bash -e examples/search/stats.sh
- name: generating workflow
run: |
cp examples/search/*.ini ./
bash -e examples/search/gen.sh
cp *.gwf output/
- name: running workflow
run: |
condor_status
cd output
bash -e ../examples/search/submit.sh
./status
python ../examples/search/check_job.py
- name: store log files
if: always()
uses: actions/upload-artifact@v2
with:
name: logs
path: output/submitdir/work/**/*.out.001
- name: store result page
uses: actions/upload-artifact@v2
with:
name: results
path: html
11 changes: 5 additions & 6 deletions bin/all_sky_search/pycbc_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,11 @@ for fi in files:

logging.info('Combining foreground segments')

# Convert segmentlistdict to a list ('seglists') of segmentlists
# then np.sum(seglists, axis=0) does seglists[0] + seglists[1] + ...
if len(indiv_segs) > 1:
foreground_segs = np.sum(list(indiv_segs.values()), axis=0)
else:
foreground_segs = indiv_segs.values()[0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GarethDaviesGW Can you check this. I think this is equivalent, but don't understand why this was complicated previously?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this was over-complicated in this way - I don't see how this wouldn't work

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and the proposed solution looks correct as well (This was probably early on in my understanding of segmentlists)

Copy link
Member Author

@ahnitz ahnitz Dec 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that taking the length is actually wrong as it is a dict. It was meant to avoid the case where there is a single segment as there the numpy sum will return a numpy ndarray instead of a segment list..

# combine the segment list from each ifo
foreground_segs = segments.segmentlist([])
for segs in indiv_segs.values():
foreground_segs += segs

f.attrs['foreground_time'] = abs(foreground_segs)

# Output the segments which are in *any* type of coincidence
Expand Down
3 changes: 2 additions & 1 deletion bin/all_sky_search/pycbc_coinc_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,8 @@ while numpy.any(louder_foreground == 0):
indices_to_rm = []
for ifo in args.ifos:
indices_to_rm = numpy.concatenate([indices_to_rm, ind_to_rm[ifo]])
all_trigs = all_trigs.remove(indices_to_rm)

all_trigs = all_trigs.remove(indices_to_rm.astype(int))
logging.info("We have %s triggers after hierarchical removal." % len(all_trigs.stat))

# Step 4: Re-cluster the triggers and calculate the inclusive ifar/fap
Expand Down
6 changes: 3 additions & 3 deletions bin/all_sky_search/pycbc_dtphase
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,15 @@ for ifo0 in args.ifos:
logging.info('smoothing done: %s', len(weights))

logging.info('converting to numpy arrays and normalizing')
keys = np.array(weights.keys())
values = np.array(weights.values())
keys = np.array(list(weights.keys()))
values = np.array(list(weights.values()))
values /= values.max()

logging.info('Removing bins outside of SNR ratio limits')
n_precut = len(keys)
keep = None
for i in range(len(args.ifos)-1):
srbin = np.array(zip(*keys)[i * 3 + 2])
srbin = np.array(list(zip(*keys))[i * 3 + 2])
if keep is None:
keep = (srbin <= srbmax) & (srbin >= srbmin)
else:
Expand Down
88 changes: 46 additions & 42 deletions bin/bank/pycbc_brute_bank
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if args.fixed_params:

class Shrinker(object):
def __init__(self, data):
self.data = data
self.data = data

def pop(self):
if len(self.data) == 0:
Expand All @@ -72,7 +72,7 @@ class TriangleBank(object):

def __len__(self):
return len(self.waveforms)

def activelen(self):
i = 0
for w in self.waveforms:
Expand All @@ -97,7 +97,7 @@ class TriangleBank(object):

def key(self, k):
return numpy.array([p.params[k] for p in self.waveforms])

def sigma_match_bound(self, sig):
if not hasattr(self, 'sigma'):
self.sigma = None
Expand All @@ -111,10 +111,10 @@ class TriangleBank(object):
if self.r is None or len(self.r) != len(self):
self.r = numpy.arange(0, len(self))
return self.r

def culltau0(self, threshold):
cull = numpy.where(self.tau0() < threshold)[0]

class dumb(object):
pass
for c in cull:
Expand All @@ -123,7 +123,7 @@ class TriangleBank(object):
d.params = self.waveforms[c].params
d.s = self.waveforms[c].s
self.waveforms[c] = d


def tau0(self):
if not hasattr(self, 't0'):
Expand All @@ -132,12 +132,12 @@ class TriangleBank(object):
self.t0 = numpy.array([h.tau0 for h in self])
return self.t0

def __contains__(self, hp):
mmax = 0
def __contains__(self, hp):
mmax = 0
mnum = 0
#Apply sigmas maximal match.
if args.enable_sigma_bound:
matches = self.sigma_match_bound(hp.s)
matches = self.sigma_match_bound(hp.s)
r = self.range()[matches > hp.threshold]
else:
matches = numpy.ones(len(self))
Expand All @@ -148,7 +148,7 @@ class TriangleBank(object):
#Apply tua0 threshold
if args.tau0_threshold:
hp.tau0 = pycbc.conversions.tau0_from_mass1_mass2(
hp.params['mass1'],
hp.params['mass1'],
hp.params['mass2'], 15.0)
hp.tbin = int(hp.tau0 / args.tau0_threshold)

Expand All @@ -160,7 +160,7 @@ class TriangleBank(object):
mtau = len(r)

# Try to do some actual matches
inc = Shrinker(r*1)
inc = Shrinker(r*1)
while 1:
j = inc.pop()
if j is None:
Expand All @@ -175,7 +175,7 @@ class TriangleBank(object):
m = hp.gen.match(hp, hc)
matches[j] = m
mnum += 1

# Update bounding match values, apply triangle inequality
maxmatches = hc.matches - m + 1.10
update = numpy.where(maxmatches < matches[hc.indices])[0]
Expand All @@ -184,11 +184,11 @@ class TriangleBank(object):
# Update where to calculate matches
skip_threshold = 1 - (1 - hp.threshold) * 2.0
inc.data = inc.data[matches[inc.data] > skip_threshold]

if m > hp.threshold:
return True
if m > mmax:
mmax = m
mmax = m

def check_params(self, gen, params, threshold):
num_tried = 0
Expand All @@ -201,7 +201,7 @@ class TriangleBank(object):
except Exception as err:
print(err)
continue

hp.gen = gen
hp.threshold = threshold
if hp not in self:
Expand All @@ -215,7 +215,7 @@ class GenUniformWaveform(object):
self.f_lower = f_lower
self.delta_f = 1.0 / buffer_length
tlen = int(buffer_length * sample_rate)
self.flen = tlen / 2 + 1
self.flen = tlen // 2 + 1
psd = pycbc.psd.from_cli(args, self.flen, self.delta_f, self.f_lower)
self.kmin = int(f_lower * buffer_length)
self.w = ((1.0 / psd[self.kmin:-1]) ** 0.5).astype(numpy.float32)
Expand All @@ -229,16 +229,16 @@ class GenUniformWaveform(object):
def generate(self, **kwds):
kwds.update(fdict)
if kwds['approximant'] in pycbc.waveform.fd_approximants():
hp, hc = pycbc.waveform.get_fd_waveform(delta_f=self.delta_f,
hp, hc = pycbc.waveform.get_fd_waveform(delta_f=self.delta_f,
f_lower=self.f_lower, **kwds)
if 'fratio' in kwds:
hp = hc * kwds['fratio'] + hp * (1 - kwds['fratio'])
else:
dt = 1.0 / args.sample_rate
hp = pycbc.waveform.get_waveform_filter(
pycbc.types.zeros(self.flen, dtype=numpy.complex64),
pycbc.types.zeros(self.flen, dtype=numpy.complex64),
delta_f=self.delta_f, delta_t=dt,
f_lower=self.f_lower, **kwds)
f_lower=self.f_lower, **kwds)

hp.resize(self.flen)
hp = hp.astype(numpy.complex64)
Expand All @@ -264,7 +264,7 @@ else:

size = int(1.0 / tolerance)

gen = GenUniformWaveform(args.buffer_length,
gen = GenUniformWaveform(args.buffer_length,
args.sample_rate, args.low_frequency_cutoff)
bank = TriangleBank()

Expand All @@ -276,7 +276,7 @@ if args.input_file:

def draw(rtype):
params = {}

if rtype == 'uniform':
for name, pmin, pmax in zip(args.params, args.min, args.max):
params[name] = numpy.random.uniform(pmin, pmax, size=size)
Expand All @@ -293,9 +293,9 @@ def draw(rtype):
points = kde.resample(size=size)
for k, v in zip(p, points):
params[k] = v

params['approximant'] = numpy.array([args.approximant]*size)

# Filter out stuff
l = None
for name, pmin, pmax in zip(args.params, args.min, args.max):
Expand All @@ -318,42 +318,42 @@ def draw(rtype):
from pycbc.conversions import mchirp_from_mass1_mass2
mc = mchirp_from_mass1_mass2(params['mass1'], params['mass2'])
l &= mc > args.min_mchirp

for k in params:
params[k] = params[k][l]

return params

def cdraw(rtype, ts, te):
from pycbc.conversions import tau0_from_mass1_mass2

p = draw(rtype)
if len(p[p.keys()[0]]) > 0:
p = draw(rtype)
if len(p[list(p.keys())[0]]) > 0:
t = tau0_from_mass1_mass2(p['mass1'], p['mass2'], 15.0)
l = (t < te) & (t > ts)
for k in p:
p[k] = p[k][l]

i = 0
while len(p[p.keys()[0]]) < size:
while len(p[list(p.keys())[0]]) < size:
tp = draw(rtype)
for k in p:
p[k] = numpy.concatenate([p[k], tp[k]])
if len(p[p.keys()[0]]) > 0:

if len(p[list(p.keys())[0]]) > 0:
t = tau0_from_mass1_mass2(p['mass1'], p['mass2'], 15.0)
l = (t < te) & (t > ts)
for k in p:
p[k] = p[k][l]
p[k] = p[k][l]

i += 1
if i > 1000:
break
if len(p[p.keys()[0]]) == 0:


if len(p[list(p.keys())[0]]) == 0:
return None

return p

tau0s = args.tau0_start
Expand All @@ -372,7 +372,7 @@ while tau0s < args.tau0_end:
if len(bank) > 0:
go = False
break

blen = len(bank)
bank, uconv = bank.check_params(gen, params, args.minimal_match)
logging.info("%s: Round (U): %s Size: %s conv: %s added: %s",
Expand All @@ -393,17 +393,21 @@ while tau0s < args.tau0_end:

if kloop == 1:
okconv = kconv

if kconv <= tolerance:
conv = kconv
break

bank.culltau0(tau0s - args.tau0_threshold * 2.0)
logging.info("Region Done %3.1f-%3.1f, %s stored", tau0s, tau0e, bank.activelen())
region += 1
region += 1
tau0s += args.tau0_crawl / 2
tau0e += args.tau0_crawl / 2

o = h5py.File(args.output_file, 'w')
for k in bank.keys():
o[k] = bank.key(k)
val = bank.key(k)
if val.dtype.char == 'U':
val = val.astype('bytes')
o[k] = val
o['f_lower'] = numpy.ones(len(val)) * args.low_frequency_cutoff
Loading