Skip to content

Commit

Permalink
Merge pull request #8 from beckermr/exit
Browse files Browse the repository at this point in the history
TST add a live test
  • Loading branch information
beckermr authored Jan 26, 2022
2 parents 8e5d863 + 9ee0c03 commit c0c2a96
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ docs/_build/

# PyBuilder
target/

scripts/conda-exec/
23 changes: 15 additions & 8 deletions mattspy/condor_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ def _attempt_result(exec, nanny_id, cjob, subids, status_code, debug):
if subid is not None and status_code in ["4", "3", "5", "7", "9"]:
outfile = os.path.join(exec.execdir, subid, "output.pkl")
infile = os.path.join(exec.execdir, subid, "input.pkl")
condorfile = os.path.join(exec.execdir, subid, "condor.sub")
logfile = os.path.join(exec.execdir, subid, "log.oe")

del ALL_CONDOR_JOBS[cjob]
if not debug:
Expand Down Expand Up @@ -258,10 +260,11 @@ def _attempt_result(exec, nanny_id, cjob, subids, status_code, debug):
res = RuntimeError(
"Condor job %s: no status or job output found!" % subid)

subprocess.run(
"rm -f %s %s" % (infile, outfile),
shell=True,
)
if not debug:
subprocess.run(
"rm -f %s %s %s %s" % (infile, outfile, condorfile, logfile),
shell=True,
)

fut = exec._nanny_subids[nanny_id][subid][1]
if isinstance(res, Exception):
Expand Down Expand Up @@ -354,14 +357,13 @@ class BNLCondorExecutor():
The conda environment to activate before running code.
max_workers : int, optional
The maximum number of condor jobs. Default is 10000.
verbose : int, optional
The maximum verbosity. If greater than zero, information can be printed.
Default is 0.
debug : bool, optional
If True, the completed condor jobs are left in the queue. This can be
useful to diagnose failures for jobs in the "held" state.
mem : int, optional
Requested memory in GB. Default is 2.
verbose : int, optional
This is ignored but is here for compatability. Use `debug=True`.
"""
def __init__(
self, conda_env, max_workers=10000,
Expand All @@ -384,7 +386,7 @@ def __init__(

def __enter__(self):
os.makedirs(self.execdir, exist_ok=True)
if self.verbose > 0:
if self.debug:
print(
"starting condor executor: "
"exec dir %s - max workers %s" % (
Expand Down Expand Up @@ -423,6 +425,11 @@ def __exit__(self, exc_type, exc_value, traceback):
self._done = True
self._exec.shutdown()
self._exec = None
if not self.debug:
subprocess.run(
f"rm -rf {self.execdir}",
shell=True,
)

def submit(self, func, *args, **kwargs):
subid = uuid.uuid4().hex
Expand Down
34 changes: 34 additions & 0 deletions scripts/run_condor_exec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import sys
import time

from concurrent.futures import as_completed
from esutil.pbar import PBar
from mattspy import BNLCondorExecutor


def fun(n):
time.sleep(120)
return n


def main():
n_jobs = int(sys.argv[1])

with BNLCondorExecutor("bnl", debug=True) as exec:
futs = [
exec.submit(fun, i)
for i in range(n_jobs)
]

tot = 0
for fut in PBar(as_completed(futs), total=len(futs), desc="running jobs"):
try:
tot += fut.result()
except Exception as e:
print(f"failure: {repr(e)}", flush=True)

assert tot == sum(range(n_jobs))


if __name__ == "__main__":
main()

0 comments on commit c0c2a96

Please sign in to comment.