From 60b43d435f7ce5c660f33a6eae2b910e6ada586f Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Wed, 30 Nov 2022 19:49:01 -0500 Subject: [PATCH] [hailctl] Add hailctl batch submit (#12471) * [hailctl] Add hailctl batch submit * make name an option not an argument * lint' * use relative file paths for files * add basic test * use the current hailgenetics/hail image not the dockerhub one * get quiet mode working to fully test hailctl batch submit * fix * cleanup * submit sets HAIL_QUERY_BACKEND to batch * fix * fixes * lint * name the batch that the submitted job spawns --- build.yaml | 57 ++++++++++++++ docker/hailgenetics/hail/Dockerfile | 1 + hail/python/hailtop/aiotools/copy.py | 3 +- hail/python/hailtop/batch_client/aioclient.py | 2 +- hail/python/hailtop/hailctl/batch/cli.py | 12 ++- hail/python/hailtop/hailctl/batch/submit.py | 74 +++++++++++++++++++ hail/python/hailtop/hailctl/batch/wait.py | 12 ++- 7 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 hail/python/hailtop/hailctl/batch/submit.py diff --git a/build.yaml b/build.yaml index 8aefaf6728e..b04f721a0ac 100644 --- a/build.yaml +++ b/build.yaml @@ -2638,6 +2638,63 @@ steps: - merge_code - service_base_image - deploy_batch + - kind: runImage + name: test_hailctl_batch + image: + valueFrom: hailgenetics_hail_image.image + script: | + set -ex + + export HAIL_GENETICS_HAIL_IMAGE="{{ hailgenetics_hail_image.image }}" + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + + hailctl config set batch/billing_project test + hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }}/{{ token }}/hailctl-test + + mkdir -p foo + echo "bar" > foo/baz.txt + + cat >simple_hail.py < Transfer: diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index f8f5c28e077..d019908927e 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -444,7 +444,7 @@ async def wait(self, description += ': ' if progress is not None: return await self._wait(description, progress, disable_progress_bar, starting_job) - with BatchProgressBar() as progress2: + with BatchProgressBar(disable=disable_progress_bar) as progress2: return await self._wait(description, progress2, disable_progress_bar, starting_job) async def debug_info(self): diff --git a/hail/python/hailtop/hailctl/batch/cli.py b/hail/python/hailtop/hailctl/batch/cli.py index 629d92dc88b..e1ad2b9bf6b 100644 --- a/hail/python/hailtop/hailctl/batch/cli.py +++ b/hail/python/hailtop/hailctl/batch/cli.py @@ -10,6 +10,7 @@ from . import log from . import job from . import billing +from . import submit def parser(): @@ -39,6 +40,11 @@ def parser(): help='Delete a batch', description='Delete a batch' ) + submit_parser = subparsers.add_parser( + 'submit', + help='Submit a batch', + description='Submit a batch', + ) log_parser = subparsers.add_parser( 'log', help='Get log for a job', @@ -69,6 +75,9 @@ def parser(): delete_parser.set_defaults(module='delete') delete.init_parser(delete_parser) + submit_parser.set_defaults(module='submit') + submit.init_parser(submit_parser) + log_parser.set_defaults(module='log') log.init_parser(log_parser) @@ -93,7 +102,8 @@ def main(args): 'cancel': cancel, 'log': log, 'job': job, - 'wait': wait + 'wait': wait, + 'submit': submit, } args, pass_through_args = parser().parse_known_args(args=args) diff --git a/hail/python/hailtop/hailctl/batch/submit.py b/hail/python/hailtop/hailctl/batch/submit.py new file mode 100644 index 00000000000..40309d59c40 --- /dev/null +++ b/hail/python/hailtop/hailctl/batch/submit.py @@ -0,0 +1,74 @@ +import asyncio +import orjson +import os + +import hailtop.batch as hb +import hailtop.batch_client.client as bc +from hailtop import pip_version +from hailtop.aiotools.copy import copy_from_dict +from hailtop.config import get_remote_tmpdir, get_user_config_path, get_deploy_config +from hailtop.utils import secret_alnum_string, unpack_comma_delimited_inputs + +HAIL_GENETICS_HAIL_IMAGE = os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{pip_version()}') + + +def init_parser(parser): + parser.add_argument('script', type=str, help='Path to script') + parser.add_argument('--name', type=str, default='', help='Batch name') + parser.add_argument('--image-name', type=str, required=False, + help='Name for Docker image. Defaults to hailgenetics/hail') + parser.add_argument('--files', nargs='+', action='append', default=[], + help='Comma-separated list of files or directories to add to the working directory of job') + parser.add_argument('-o', type=str, default='text', choices=['text', 'json']) + + +async def async_main(args): + script = args.script + files = unpack_comma_delimited_inputs(args.files) + user_config = get_user_config_path() + quiet = args.o != 'text' + + remote_tmpdir = get_remote_tmpdir('hailctl batch submit') + tmpdir_path_prefix = secret_alnum_string() + + def cloud_prefix(path): + return f'{remote_tmpdir}/{tmpdir_path_prefix}/{path}' + + b = hb.Batch(name=args.name, backend=hb.ServiceBackend()) + j = b.new_bash_job() + j.image(args.image_name or HAIL_GENETICS_HAIL_IMAGE) + + rel_file_paths = [os.path.relpath(file) for file in files] + local_files_to_cloud_files = [{'from': local, 'to': cloud_prefix(local)} for local in rel_file_paths] + await copy_from_dict(files=[ + {'from': script, 'to': cloud_prefix(script)}, + {'from': str(user_config), 'to': cloud_prefix(user_config)}, + *local_files_to_cloud_files, + ]) + for file in local_files_to_cloud_files: + local_file = file['from'] + cloud_file = file['to'] + in_file = b.read_input(cloud_file) + j.command(f'ln -s {in_file} {local_file}') + + script_file = b.read_input(cloud_prefix(script)) + config_file = b.read_input(cloud_prefix(user_config)) + j.command(f'mkdir -p $HOME/.config/hail && ln -s {config_file} $HOME/.config/hail/config.ini') + + j.env('HAIL_QUERY_BACKEND', 'batch') + + command = 'python3' if script.endswith('.py') else 'bash' + j.command(f'{command} {script_file}') + batch_handle: bc.Batch = b.run(wait=False, disable_progress_bar=quiet) # type: ignore + + if args.o == 'text': + deploy_config = get_deploy_config() + url = deploy_config.external_url('batch', f'/batches/{batch_handle.id}/jobs/1') + print(f'Submitted batch {batch_handle.id}, see {url}') + else: + assert args.o == 'json' + print(orjson.dumps({'id': batch_handle.id}).decode('utf-8')) + + +def main(args, pass_through_args, client): # pylint: disable=unused-argument + asyncio.run(async_main(args)) diff --git a/hail/python/hailtop/hailctl/batch/wait.py b/hail/python/hailtop/hailctl/batch/wait.py index 1999aa9213c..0aad2e3ab6a 100644 --- a/hail/python/hailtop/hailctl/batch/wait.py +++ b/hail/python/hailtop/hailctl/batch/wait.py @@ -1,9 +1,14 @@ +import json import sys from .batch_cli_utils import get_batch_if_exists def init_parser(parser): parser.add_argument('batch_id', type=int) + parser.add_argument("--quiet", "-q", + action="store_true", + help="Do not print a progress bar for the batch") + parser.add_argument('-o', type=str, default='text', choices=['text', 'json']) def main(args, pass_through_args, client): # pylint: disable=unused-argument @@ -13,4 +18,9 @@ def main(args, pass_through_args, client): # pylint: disable=unused-argument sys.exit(1) batch = maybe_batch - print(batch.wait()) + quiet = args.quiet or args.o != 'text' + out = batch.wait(disable_progress_bar=quiet) + if args.o == 'json': + print(json.dumps(out)) + else: + print(out)