From 31a088979f7356bb13bb7c2e3b53d7501431c57b Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Fri, 23 Jun 2023 15:58:32 -0400 Subject: [PATCH] No worker deploy config secret (#13211) A revamp of #13203, I've made no changes to the scala code so tests passing on this should mean we maintain compatibility with currently released JARs. --- .../cloud/azure/driver/create_instance.py | 7 ++++++ .../batch/cloud/gcp/driver/create_instance.py | 8 +++++++ batch/batch/front_end/front_end.py | 8 ------- batch/batch/globals.py | 2 +- batch/batch/worker/worker.py | 23 ++++++++++++++++--- batch/test/test_batch.py | 19 ++++++++------- hail/python/hailtop/config/deploy_config.py | 13 ++++++++++- 7 files changed, 57 insertions(+), 23 deletions(-) diff --git a/batch/batch/cloud/azure/driver/create_instance.py b/batch/batch/cloud/azure/driver/create_instance.py index d9b75157c33..70444c02168 100644 --- a/batch/batch/cloud/azure/driver/create_instance.py +++ b/batch/batch/cloud/azure/driver/create_instance.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Optional from gear.cloud_config import get_global_config +from hailtop.config import get_deploy_config from ....batch_configuration import DEFAULT_NAMESPACE, DOCKER_PREFIX, DOCKER_ROOT_IMAGE, INTERNAL_GATEWAY_IP from ....file_store import FileStore @@ -230,6 +231,11 @@ def create_vm_config( {make_global_config_str} +mkdir /deploy-config +cat >/deploy-config/deploy-config.json < dict: {make_global_config_str} +mkdir /deploy-config +cat >/deploy-config/deploy-config.json < dict: -v /batch:/batch:shared \ -v /logs:/logs \ -v /global-config:/global-config \ +-v /deploy-config:/deploy-config \ -v /cloudfuse:/cloudfuse:shared \ -v /etc/netns:/etc/netns \ -v /sys/fs/cgroup:/sys/fs/cgroup \ diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index cdefef22c71..e0726c1f60d 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -1033,14 +1033,6 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, update_id 'mount_in_copy': False, } ) - secrets.append( - { - 'namespace': DEFAULT_NAMESPACE, - 'name': 'worker-deploy-config', - 'mount_path': '/deploy-config', - 'mount_in_copy': False, - } - ) secrets.append( { 'namespace': DEFAULT_NAMESPACE, diff --git a/batch/batch/globals.py b/batch/batch/globals.py index cbf6e3326da..c1a52e833cd 100644 --- a/batch/batch/globals.py +++ b/batch/batch/globals.py @@ -21,7 +21,7 @@ BATCH_FORMAT_VERSION = 7 STATUS_FORMAT_VERSION = 5 -INSTANCE_VERSION = 24 +INSTANCE_VERSION = 25 MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024 RESERVED_STORAGE_GB_PER_CORE = 5 diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 4bca51e9968..0b9848723ab 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -48,7 +48,7 @@ from hailtop.aiotools import AsyncFS, LocalAsyncFS from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES -from hailtop.config import DeployConfig +from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger, configure_logging from hailtop.utils import ( CalledProcessError, @@ -199,7 +199,7 @@ def compose(auth: Union[MutableMapping, str, bytes], registry_addr: Optional[str N_SLOTS = 4 * CORES # Jobs are allowed at minimum a quarter core -deploy_config = DeployConfig('gce', NAMESPACE, {}) +deploy_config = get_deploy_config() docker: Optional[aiodocker.Docker] = None @@ -1246,7 +1246,7 @@ def _mounts(self, uid: int, gid: int) -> List[MountSpecification]: } ) - return ( + mounts = ( self.volume_mounts + external_volumes + [ @@ -1309,6 +1309,18 @@ def _mounts(self, uid: int, gid: int) -> List[MountSpecification]: ] ) + if not any(v['destination'] == '/deploy-config' for v in self.volume_mounts): + mounts.append( + { + 'source': '/deploy-config/deploy-config.json', + 'destination': '/deploy-config/deploy-config.json', + 'type': 'none', + 'options': ['bind', 'ro', 'private'], + }, + ) + + return mounts + def _env(self): assert self.image.image_config env = self.image.image_config['Config']['Env'] + self.env @@ -2085,6 +2097,11 @@ def write_batch_config(self): os.makedirs(f'{self.scratch}/batch-config') with open(f'{self.scratch}/batch-config/batch-config.json', 'wb') as config: config.write(orjson.dumps({'version': 1, 'batch_id': self.batch_id})) + # Necessary for backward compatibility for Hail Query jars that expect + # the deploy config at this path and not at `/deploy-config/deploy-config.json` + os.makedirs(f'{self.scratch}/secrets/deploy-config', exist_ok=True) + with open(f'{self.scratch}/secrets/deploy-config/deploy-config.json', 'wb') as config: + config.write(orjson.dumps(deploy_config.get_config())) def step(self, name): return self.timings.step(name) diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index 8519a7450db..ca47ae4dbdc 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -903,7 +903,7 @@ def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient): '-c', f''' hailctl config set domain {DOMAIN} -rm /deploy-config/deploy-config.json +export HAIL_DEFAULT_NAMESPACE=default python3 -c \'{script}\'''', ], mount_tokens=True, @@ -916,27 +916,26 @@ def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient): assert status['state'] == 'Failed', str((status, b.debug_info())) assert "Please log in" in j.log()['main'], (str(j.log()['main']), status) + +def test_deploy_config_is_mounted_as_readonly(client: BatchClient): bb = create_batch(client) j = bb.create_job( HAIL_GENETICS_HAILTOP_IMAGE, [ '/bin/bash', '-c', - f''' + ''' +set -ex jq '.default_namespace = "default"' /deploy-config/deploy-config.json > tmp.json -mv tmp.json /deploy-config/deploy-config.json -python3 -c \'{script}\'''', +mv tmp.json /deploy-config/deploy-config.json''', ], mount_tokens=True, ) b = bb.submit() status = j.wait() - if NAMESPACE == 'default': - assert status['state'] == 'Success', str((status, b.debug_info())) - else: - assert status['state'] == 'Failed', str((status, b.debug_info())) - job_log = j.log() - assert "Please log in" in job_log['main'], str((job_log, b.debug_info())) + assert status['state'] == 'Failed', str((status, b.debug_info())) + job_log = j.log() + assert "mv: cannot move" in job_log['main'], str((job_log, b.debug_info())) def test_cannot_contact_other_internal_ips(client: BatchClient): diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index 0c4202e9ece..f8da8534918 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -9,10 +9,18 @@ log = logging.getLogger('deploy_config') +def env_var_or_default(name: str, default: str) -> str: + return os.environ.get(f'HAIL_{name}') or default + + class DeployConfig: @staticmethod def from_config(config) -> 'DeployConfig': - return DeployConfig(config['location'], config['default_namespace'], config.get('domain') or 'hail.is') + return DeployConfig( + env_var_or_default('LOCATION', config['location']), + env_var_or_default('DEFAULT_NAMESPACE', config['default_namespace']), + env_var_or_default('DOMAIN', config.get('domain') or 'hail.is') + ) def get_config(self) -> Dict[str, str]: return { @@ -51,6 +59,9 @@ def __init__(self, location, default_namespace, domain): def with_default_namespace(self, default_namespace): return DeployConfig(self._location, default_namespace, self._domain) + def with_location(self, location): + return DeployConfig(location, self._default_namespace, self._domain) + def default_namespace(self): return self._default_namespace