Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EWMS Sidecar Pt-3 #112

Merged
merged 13 commits into from
Dec 20, 2023
3 changes: 2 additions & 1 deletion .github/workflows/wipac-cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: [push]

env:
CI_TEST: 'yes'
CLIENTMANAGER_IMAGE_WITH_TAG: 'ghcr.io/wipacrepo/skydriver:latest'
THIS_IMAGE_WITH_TAG: 'ghcr.io/wipacrepo/skydriver:latest'
EWMS_PILOT_TASK_TIMEOUT: 999
SCAN_BACKLOG_RUNNER_SHORT_DELAY: 1
SCAN_BACKLOG_RUNNER_DELAY: 1
Expand Down Expand Up @@ -135,6 +135,7 @@ jobs:

docker run --network="host" --rm -i --name test \
--env LATEST_TAG=$LATEST_TAG \
--env THIS_IMAGE_WITH_TAG=$THIS_IMAGE_WITH_TAG \
$(env | grep '^SKYSCAN_' | awk '$0="--env "$0') \
$(env | grep '^EWMS_' | awk '$0="--env "$0') \
$(env | grep '^CLIENTMANAGER_' | awk '$0="--env "$0') \
Expand Down
10 changes: 5 additions & 5 deletions dependencies-from-Dockerfile.log
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# pip freeze
########################################################################
backoff==2.2.1
boto3==1.34.3
botocore==1.34.3
boto3==1.34.4
botocore==1.34.4
cachetools==5.3.2
certifi==2023.11.17
cffi==1.16.0
Expand Down Expand Up @@ -75,15 +75,15 @@ pip==23.2.1
pipdeptree==2.13.1
setuptools==65.5.1
skydriver-clientmanager-ewms-sidecar
├── boto3 [required: Any, installed: 1.34.3]
│ ├── botocore [required: >=1.34.3,<1.35.0, installed: 1.34.3]
├── boto3 [required: Any, installed: 1.34.4]
│ ├── botocore [required: >=1.34.4,<1.35.0, installed: 1.34.4]
│ │ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ │ ├── python-dateutil [required: >=2.1,<3.0.0, installed: 2.8.2]
│ │ │ └── six [required: >=1.5, installed: 1.16.0]
│ │ └── urllib3 [required: >=1.25.4,<2.1, installed: 1.26.18]
│ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ └── s3transfer [required: >=0.9.0,<0.10.0, installed: 0.9.0]
│ └── botocore [required: >=1.33.2,<2.0a.0, installed: 1.34.3]
│ └── botocore [required: >=1.33.2,<2.0a.0, installed: 1.34.4]
│ ├── jmespath [required: >=0.7.1,<2.0.0, installed: 1.0.1]
│ ├── python-dateutil [required: >=2.1,<3.0.0, installed: 2.8.2]
│ │ └── six [required: >=1.5, installed: 1.16.0]
Expand Down
6 changes: 3 additions & 3 deletions ewms_sidecar/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Entry-point to start up clientmanager service."""
"""Entry-point to start up EWMS Sidecar."""

from . import clientmanager, config
from . import config, ewms_sidecar

if __name__ == "__main__":
clientmanager.main()
ewms_sidecar.main()
config.LOGGER.info("Done.")
125 changes: 57 additions & 68 deletions ewms_sidecar/condor/act.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .. import utils
from ..config import ENV, LOGGER
from . import condor_tools, starter, stopper, watcher
from . import condor_tools, starter, watcher


def act(args: argparse.Namespace) -> None:
Expand All @@ -26,70 +26,59 @@ def act(args: argparse.Namespace) -> None:


def _act(args: argparse.Namespace, schedd_obj: htcondor.Schedd) -> None:
match args.action:
case "start":
LOGGER.info(
f"Starting {args.n_workers} Skymap Scanner client workers on {args.collector} / {args.schedd}"
)
# make connections -- do now so we don't have any surprises downstream
skydriver_rc = utils.connect_to_skydriver()
# start
submit_dict = starter.prep(
spool=args.spool,
# starter CL args -- worker
worker_memory_bytes=args.worker_memory_bytes,
worker_disk_bytes=args.worker_disk_bytes,
n_cores=args.n_cores,
max_worker_runtime=args.max_worker_runtime,
priority=args.priority,
# starter CL args -- client
client_args=args.client_args,
client_startup_json_s3=utils.s3ify(args.client_startup_json),
image=args.image,
)
# final checks
if args.dryrun:
LOGGER.critical("Script Aborted: dryrun enabled")
return
if utils.skydriver_aborted_scan(skydriver_rc):
LOGGER.critical("Script Aborted: SkyDriver aborted scan")
return
# start
submit_result_obj = starter.start(
schedd_obj=schedd_obj,
n_workers=args.n_workers,
submit_dict=submit_dict,
spool=args.spool,
)
# report to SkyDriver
skydriver_cluster_obj = dict(
orchestrator="condor",
location={
"collector": args.collector,
"schedd": args.schedd,
},
uuid=args.uuid,
cluster_id=submit_result_obj.cluster(),
n_workers=submit_result_obj.num_procs(),
starter_info=submit_dict,
)
utils.update_skydriver(skydriver_rc, **skydriver_cluster_obj)
LOGGER.info("Sent cluster info to SkyDriver")
watcher.watch(
args.collector,
args.schedd,
submit_result_obj.cluster(),
schedd_obj,
submit_result_obj.num_procs(),
skydriver_rc,
skydriver_cluster_obj,
)
case "stop":
stopper.stop(
args.collector,
args.schedd,
args.cluster_id,
schedd_obj,
)
case _:
raise RuntimeError(f"Unknown action: {args.action}")
LOGGER.info(
f"Starting {args.n_workers} Skymap Scanner client workers on {args.collector} / {args.schedd}"
)
# make connections -- do now so we don't have any surprises downstream
skydriver_rc = utils.connect_to_skydriver()
# start
submit_dict = starter.prep(
spool=args.spool,
# starter CL args -- worker
worker_memory_bytes=args.worker_memory_bytes,
worker_disk_bytes=args.worker_disk_bytes,
n_cores=args.n_cores,
max_worker_runtime=args.max_worker_runtime,
priority=args.priority,
# starter CL args -- client
client_args=args.client_args,
client_startup_json_s3=utils.s3ify(args.client_startup_json),
image=args.image,
)
# final checks
if args.dryrun:
LOGGER.critical("Script Aborted: dryrun enabled")
return
if utils.skydriver_aborted_scan(skydriver_rc):
LOGGER.critical("Script Aborted: SkyDriver aborted scan")
return
# start
submit_result_obj = starter.start(
schedd_obj=schedd_obj,
n_workers=args.n_workers,
submit_dict=submit_dict,
spool=args.spool,
)
# report to SkyDriver
skydriver_cluster_obj = dict(
orchestrator="condor",
location={
"collector": args.collector,
"schedd": args.schedd,
},
uuid=args.uuid,
cluster_id=submit_result_obj.cluster(),
n_workers=submit_result_obj.num_procs(),
starter_info=submit_dict,
)
utils.update_skydriver(skydriver_rc, **skydriver_cluster_obj)
LOGGER.info("Sent cluster info to SkyDriver")
watcher.watch(
args.collector,
args.schedd,
submit_result_obj.cluster(),
schedd_obj,
submit_result_obj.num_procs(),
skydriver_rc,
skydriver_cluster_obj,
)
Loading