From 4d0379b1da793d0c28e1673a00fafdf348f6c13d Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Tue, 5 Sep 2023 21:41:26 -0400 Subject: [PATCH 01/15] [batch] Use the test and test-dev GSAs in test_batch not CI (#13562) See relevant discussion [here](https://hail.zulipchat.com/#narrow/stream/127527-team/topic/CI.20Deploy.20Failure/near/389266274) --- batch/batch/globals.py | 2 +- batch/test/billing_projects.py | 2 +- batch/test/test_accounts.py | 5 +- build.yaml | 102 ++++++++++-------- hail/python/hailtop/auth/auth.py | 10 +- hail/python/hailtop/batch_client/aioclient.py | 6 +- 6 files changed, 72 insertions(+), 55 deletions(-) diff --git a/batch/batch/globals.py b/batch/batch/globals.py index c1a52e833cd..5014241eac1 100644 --- a/batch/batch/globals.py +++ b/batch/batch/globals.py @@ -21,7 +21,7 @@ BATCH_FORMAT_VERSION = 7 STATUS_FORMAT_VERSION = 5 -INSTANCE_VERSION = 25 +INSTANCE_VERSION = 26 MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024 RESERVED_STORAGE_GB_PER_CORE = 5 diff --git a/batch/test/billing_projects.py b/batch/test/billing_projects.py index ef4f1ad9f45..991fc596ff9 100644 --- a/batch/test/billing_projects.py +++ b/batch/test/billing_projects.py @@ -10,7 +10,7 @@ def get_billing_project_prefix(): async def delete_all_test_billing_projects(): billing_project_prefix = get_billing_project_prefix() - bc = await BatchClient.create('', token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE']) + bc = await BatchClient.create('', cloud_credentials_file=os.environ['HAIL_TEST_DEV_GSA_KEY_FILE']) try: for project in await bc.list_billing_projects(): if project['billing_project'].startswith(billing_project_prefix): diff --git a/batch/test/test_accounts.py b/batch/test/test_accounts.py index 5a2673ae3d2..941f2c64b93 100644 --- a/batch/test/test_accounts.py +++ b/batch/test/test_accounts.py @@ -24,7 +24,7 @@ async def make_client() -> AsyncGenerator[Callable[[str], Awaitable[BatchClient] _bcs = [] async def factory(project: str): - bc = await BatchClient.create(project, token_file=os.environ['HAIL_TEST_TOKEN_FILE']) + bc = await BatchClient.create(project, cloud_credentials_file=os.environ['HAIL_TEST_GSA_KEY_FILE']) _bcs.append(bc) return bc @@ -36,7 +36,8 @@ async def factory(project: str): @pytest.fixture async def dev_client() -> AsyncGenerator[BatchClient, Any]: bc = await BatchClient.create( - 'billing-project-not-needed-but-required-by-BatchClient', token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE'] + 'billing-project-not-needed-but-required-by-BatchClient', + cloud_credentials_file=os.environ['HAIL_TEST_DEV_GSA_KEY_FILE'], ) yield bc await bc.close() diff --git a/build.yaml b/build.yaml index 43a42538360..d909a3b3ac2 100644 --- a/build.yaml +++ b/build.yaml @@ -1088,6 +1088,7 @@ steps: export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export HAIL_TEST_S3_BUCKET=hail-test-dy5rg export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials @@ -1213,6 +1214,7 @@ steps: # The test should use the test credentials, not CI's credentials sed -i 's/gsa-key/test-gsa-key/g' ${SPARK_HOME}/conf/core-site.xml export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export HAIL_QUERY_N_CORES=2 export OMP_NUM_THREADS=2 @@ -1646,6 +1648,8 @@ steps: valueFrom: hailgenetics_hailtop_image.image script: | set -ex + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} {% if default_ns.name == "default" %} @@ -1658,10 +1662,10 @@ steps: {{ code.username }} {{ code.login_id }} {% endif %} secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key dependsOn: - default_ns - hailgenetics_hailtop_image @@ -1747,6 +1751,8 @@ steps: valueFrom: hail_dev_image.image script: | set -ex + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} python3 -m pytest \ @@ -1760,10 +1766,10 @@ steps: --timeout=120 \ /io/monitoring/test secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key timeout: 300 inputs: - from: /repo/monitoring/test @@ -1787,6 +1793,9 @@ steps: script: | set -ex + # Or else hailctl will try to authenticate as CI + unset HAIL_IDENTITY_PROVIDER_JSON + export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_DEPLOY_CONFIG_FILE=/deploy-config/deploy-config.json @@ -1846,6 +1855,9 @@ steps: script: | set -ex + # Or else hailctl will try to authenticate as CI + unset HAIL_IDENTITY_PROVIDER_JSON + export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_DEPLOY_CONFIG_FILE=/deploy-config/deploy-config.json @@ -2293,6 +2305,8 @@ steps: script: | set -ex + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} {% for user in code.get("developers", []) %} {% if user['username'] != 'test-dev' %} @@ -2304,10 +2318,10 @@ steps: {% endif %} {% endfor %} secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key scopes: - dev - test @@ -2417,10 +2431,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens dependsOn: - default_ns - merge_code @@ -2501,10 +2511,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - name: auth-oauth2-client-secret namespace: valueFrom: default_ns.name @@ -2604,6 +2610,14 @@ steps: valueFrom: batch_image.image script: | set -ex + + # Use the test identity's credentials instead of CI's + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + + export HAIL_TEST_GSA_KEY_FILE=/test-gsa-key/key.json + export HAIL_TEST_DEV_GSA_KEY_FILE=/test-dev-gsa-key/key.json + export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export CI_UTILS_IMAGE={{ ci_utils_image.image }} export HAIL_CURL_IMAGE={{ curl_image.image }} @@ -2615,8 +2629,6 @@ steps: export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_GENETICS_HAILTOP_IMAGE="{{ hailgenetics_hailtop_image.image }}" export HAIL_GENETICS_HAIL_IMAGE="{{ hailgenetics_hail_image.image }}" - export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json - export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" export HAIL_CLOUD="{{ global.cloud }}" export HAIL_DOMAIN="{{ global.domain }}" @@ -2645,18 +2657,14 @@ steps: port: 5000 timeout: 1500 secrets: - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - - name: test-dev-tokens - namespace: - valueFrom: default_ns.name - mountPath: /dev-tokens - name: test-gsa-key namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-dev-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /test-dev-gsa-key dependsOn: - create_deploy_config - create_accounts @@ -2682,7 +2690,9 @@ steps: set -ex export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export HAIL_TEST_DEV_GSA_KEY_FILE=/test-dev-gsa-key/key.json export HAIL_TOKEN="{{ test_batch.token }}" cd /io/test python3 -c ' @@ -2694,10 +2704,10 @@ steps: - from: /repo/batch/test to: /io/test secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /dev-tokens + mountPath: /test-dev-gsa-key alwaysRun: true dependsOn: - create_deploy_config @@ -2829,6 +2839,8 @@ steps: script: | set -ex export ORGANIZATION=hail-ci-test + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export REPO_NAME=ci-test-"{{ create_ci_test_repo.token }}" export NAMESPACE="{{ default_ns.name }}" @@ -2842,10 +2854,10 @@ steps: --durations=50 \ /io/ci/test secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key timeout: 5400 inputs: - from: /repo/ci/test @@ -2876,6 +2888,7 @@ steps: export HAIL_CLOUD={{ global.cloud }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_GENETICS_HAIL_IMAGE="{{ hailgenetics_hail_image.image }}" export HAIL_GENETICS_HAILTOP_IMAGE="{{ hailgenetics_hailtop_image.image }}" @@ -2900,10 +2913,6 @@ steps: to: /io/hailtop timeout: 1200 secrets: - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -3050,6 +3059,7 @@ steps: set -ex export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json cd /io/hailtop/batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }}/test_batch_docs/{{ token }}/ @@ -3067,10 +3077,6 @@ steps: --timeout=120 \ --ignore=docs/conf.py secrets: - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -3455,16 +3461,18 @@ steps: valueFrom: hailgenetics_hailtop_image.image script: | set -ex + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json hailctl curl {{ default_ns.name }} www / \ -vvv \ -fsSL \ --retry 3 \ --retry-delay 5 secrets: - - name: test-tokens + - name: test-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-gsa-key dependsOn: - default_ns - create_accounts @@ -3515,10 +3523,6 @@ steps: - from: /repo/hail/testng-fs.xml to: /io/testng-fs.xml secrets: - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -3586,6 +3590,8 @@ steps: valueFrom: hailgenetics_hailtop_image.image script: | export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json cat >cancel_all_running_test_batches.py <<'EOF' from hailtop.batch_client.aioclient import BatchClient @@ -3607,10 +3613,10 @@ steps: python3 cancel_all_running_test_batches.py secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key alwaysRun: true timeout: 300 dependsOn: @@ -3632,6 +3638,8 @@ steps: image: valueFrom: hail_dev_image.image script: | + export GOOGLE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-dev-gsa-key/key.json export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -3652,10 +3660,10 @@ steps: to: /io/test timeout: 300 secrets: - - name: test-dev-tokens + - name: test-dev-gsa-key namespace: valueFrom: default_ns.name - mountPath: /user-tokens + mountPath: /test-dev-gsa-key scopes: - test - dev diff --git a/hail/python/hailtop/auth/auth.py b/hail/python/hailtop/auth/auth.py index efd18937901..796bd9b206e 100644 --- a/hail/python/hailtop/auth/auth.py +++ b/hail/python/hailtop/auth/auth.py @@ -80,16 +80,17 @@ async def __aexit__(self, *_) -> None: def hail_credentials( *, tokens_file: Optional[str] = None, + cloud_credentials_file: Optional[str] = None, namespace: Optional[str] = None, authorize_target: bool = True ) -> HailCredentials: tokens = get_tokens(tokens_file) deploy_config = get_deploy_config() ns = namespace or deploy_config.default_namespace() - return HailCredentials(tokens, get_cloud_credentials_scoped_for_hail(), ns, authorize_target=authorize_target) + return HailCredentials(tokens, get_cloud_credentials_scoped_for_hail(credentials_file=cloud_credentials_file), ns, authorize_target=authorize_target) -def get_cloud_credentials_scoped_for_hail() -> Optional[CloudCredentials]: +def get_cloud_credentials_scoped_for_hail(credentials_file: Optional[str] = None) -> Optional[CloudCredentials]: scopes: Optional[List[str]] spec = load_identity_spec() @@ -100,6 +101,8 @@ def get_cloud_credentials_scoped_for_hail() -> Optional[CloudCredentials]: scopes = ['email', 'openid', 'profile'] if spec.oauth2_credentials is not None: return GoogleCredentials.from_credentials_data(spec.oauth2_credentials, scopes=scopes) + if credentials_file is not None: + return GoogleCredentials.from_file(credentials_file) return GoogleCredentials.default_credentials(scopes=scopes, anonymous_ok=False) assert spec.idp == IdentityProvider.MICROSOFT @@ -110,6 +113,9 @@ def get_cloud_credentials_scoped_for_hail() -> Optional[CloudCredentials]: scopes = [os.environ["HAIL_AZURE_OAUTH_SCOPE"]] else: scopes = None + + if credentials_file is not None: + return AzureCredentials.from_file(credentials_file, scopes=scopes) return AzureCredentials.default_credentials(scopes=scopes) diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 922fdf93913..7e607c12074 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -850,7 +850,9 @@ async def create(billing_project: str, session: Optional[httpx.ClientSession] = None, headers: Optional[Dict[str, str]] = None, _token: Optional[str] = None, - token_file: Optional[str] = None): + token_file: Optional[str] = None, + *, + cloud_credentials_file: Optional[str] = None): if not deploy_config: deploy_config = get_deploy_config() url = deploy_config.base_url('batch') @@ -860,7 +862,7 @@ async def create(billing_project: str, if _token is not None: credentials = HailExplicitTokenCredentials(_token) else: - credentials = hail_credentials(tokens_file=token_file) + credentials = hail_credentials(tokens_file=token_file, cloud_credentials_file=cloud_credentials_file) return BatchClient( billing_project=billing_project, url=url, From 4fb659c1fcdd5a1f86cab8954218679f84c2f07e Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 10:01:37 -0400 Subject: [PATCH 02/15] [qob][batch] do not list all jobs on failure (plus: types!) (#13500) CHANGELOG: In Query-on-Batch, the client-side Python code will not try to list every job when a QoB batch fails. This could take hours for long-running pipelines or pipelines with many partitions. I also added API types to the list jobs end point because I have to go hunting for this every time anyway. Seems better to have this information at our digital fingertips. --- batch/batch/batch.py | 11 ++-- batch/batch/batch_format_version.py | 4 +- batch/batch/front_end/front_end.py | 54 +++++++++++-------- hail/python/hail/backend/service_backend.py | 10 +++- hail/python/hailtop/batch_client/__init__.py | 3 +- hail/python/hailtop/batch_client/aioclient.py | 47 ++++++++++++---- hail/python/hailtop/batch_client/types.py | 43 +++++++++++++++ hail/python/hailtop/hailctl/batch/cli.py | 9 +++- hail/python/hailtop/utils/utils.py | 2 +- 9 files changed, 137 insertions(+), 46 deletions(-) create mode 100644 hail/python/hailtop/batch_client/types.py diff --git a/batch/batch/batch.py b/batch/batch/batch.py index fcfc4e41058..8eb5209a1e6 100644 --- a/batch/batch/batch.py +++ b/batch/batch/batch.py @@ -1,8 +1,9 @@ import json import logging -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from gear import transaction +from hailtop.batch_client.types import CostBreakdownEntry, JobListEntryV1Alpha from hailtop.utils import humanize_timedelta_msecs, time_msecs_str from .batch_format_version import BatchFormatVersion @@ -12,7 +13,7 @@ log = logging.getLogger('batch') -def cost_breakdown_to_dict(cost_breakdown: dict): +def cost_breakdown_to_dict(cost_breakdown: Dict[str, float]) -> List[CostBreakdownEntry]: return [{'resource': resource, 'cost': cost} for resource, cost in cost_breakdown.items()] @@ -75,7 +76,7 @@ def _time_msecs_str(t): return d -def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str, Any]: +def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> JobListEntryV1Alpha: format_version = BatchFormatVersion(record['format_version']) db_status = record['status'] @@ -89,7 +90,7 @@ def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str, if record['cost_breakdown'] is not None: record['cost_breakdown'] = cost_breakdown_to_dict(json.loads(record['cost_breakdown'])) - result = { + return { 'batch_id': record['batch_id'], 'job_id': record['job_id'], 'name': name, @@ -103,8 +104,6 @@ def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str, 'cost_breakdown': record['cost_breakdown'], } - return result - async def cancel_batch_in_db(db, batch_id): @transaction(db) diff --git a/batch/batch/batch_format_version.py b/batch/batch/batch_format_version.py index 088885ef312..06c8dc07f2a 100644 --- a/batch/batch/batch_format_version.py +++ b/batch/batch/batch_format_version.py @@ -1,3 +1,5 @@ +from typing import Optional, Tuple + from hailtop.batch_client.aioclient import Job @@ -117,7 +119,7 @@ def db_status(self, status): return [ec, duration] - def get_status_exit_code_duration(self, status): + def get_status_exit_code_duration(self, status) -> Tuple[Optional[int], Optional[int]]: if self.format_version == 1: job_status = {'status': status} return (Job.exit_code(job_status), Job.total_duration_msecs(job_status)) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 68fd981df8c..ed111e36213 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -11,7 +11,7 @@ import traceback from functools import wraps from numbers import Number -from typing import Any, Awaitable, Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union +from typing import Any, Awaitable, Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union, cast import aiohttp import aiohttp.web_exceptions @@ -45,6 +45,7 @@ from gear.profiling import install_profiler_if_requested from hailtop import aiotools, dictfix, httpx, version from hailtop.batch_client.parse import parse_cpu_in_mcpu, parse_memory_in_bytes, parse_storage_in_bytes +from hailtop.batch_client.types import GetJobResponseV1Alpha, GetJobsResponseV1Alpha, JobListEntryV1Alpha from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger from hailtop.tls import internal_server_ssl_context @@ -252,7 +253,9 @@ async def _handle_api_error(f: Callable[P, Awaitable[T]], *args: P.args, **kwarg raise e.http_response() -async def _query_batch_jobs(request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int]): +async def _query_batch_jobs( + request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int] +) -> Tuple[List[JobListEntryV1Alpha], Optional[int]]: db: Database = request.app['db'] if version == 1: sql, sql_args = parse_batch_jobs_query_v1(batch_id, q, last_job_id) @@ -270,7 +273,9 @@ async def _query_batch_jobs(request: web.Request, batch_id: int, version: int, q return (jobs, last_job_id) -async def _get_jobs(request, batch_id: int, version: int, q: str, last_job_id: Optional[int]): +async def _get_jobs( + request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int] +) -> GetJobsResponseV1Alpha: db = request.app['db'] record = await db.select_and_fetchone( @@ -1739,7 +1744,7 @@ async def ui_batches(request: web.Request, userdata: UserData) -> web.Response: return await render_template('batch', request, userdata, 'batches.html', page_context) -async def _get_job(app, batch_id, job_id): +async def _get_job(app, batch_id, job_id) -> GetJobResponseV1Alpha: db: Database = app['db'] record = await db.select_and_fetchone( @@ -1786,9 +1791,11 @@ async def _get_job(app, batch_id, job_id): _get_full_job_status(app, record), _get_full_job_spec(app, record), _get_attributes(app, record) ) - job = job_record_to_dict(record, attributes.get('name')) - job['status'] = full_status - job['spec'] = full_spec + job: GetJobResponseV1Alpha = { + **job_record_to_dict(record, attributes.get('name')), + 'status': full_status, + 'spec': full_spec, + } if attributes: job['attributes'] = attributes return job @@ -2072,6 +2079,8 @@ async def ui_get_job(request, userdata, batch_id): _get_job_resource_usage(app, batch_id, job_id), ) + job = cast(Dict[str, Any], job) + job['duration'] = humanize_timedelta_msecs(job['duration']) job['cost'] = cost_str(job['cost']) @@ -2131,21 +2140,22 @@ async def ui_get_job(request, userdata, batch_id): non_io_storage_limit_bytes = None memory_limit_bytes = None - resources = job_specification['resources'] - if 'memory_bytes' in resources: - memory_limit_bytes = resources['memory_bytes'] - resources['actual_memory'] = humanize.naturalsize(memory_limit_bytes, binary=True) - del resources['memory_bytes'] - if 'storage_gib' in resources: - io_storage_limit_bytes = resources['storage_gib'] * 1024**3 - resources['actual_storage'] = humanize.naturalsize(io_storage_limit_bytes, binary=True) - del resources['storage_gib'] - if 'cores_mcpu' in resources: - cores = resources['cores_mcpu'] / 1000 - non_io_storage_limit_gb = min(cores * RESERVED_STORAGE_GB_PER_CORE, RESERVED_STORAGE_GB_PER_CORE) - non_io_storage_limit_bytes = int(non_io_storage_limit_gb * 1024**3 + 1) - resources['actual_cpu'] = cores - del resources['cores_mcpu'] + if job_specification is not None: + resources = job_specification['resources'] + if 'memory_bytes' in resources: + memory_limit_bytes = resources['memory_bytes'] + resources['actual_memory'] = humanize.naturalsize(memory_limit_bytes, binary=True) + del resources['memory_bytes'] + if 'storage_gib' in resources: + io_storage_limit_bytes = resources['storage_gib'] * 1024**3 + resources['actual_storage'] = humanize.naturalsize(io_storage_limit_bytes, binary=True) + del resources['storage_gib'] + if 'cores_mcpu' in resources: + cores = resources['cores_mcpu'] / 1000 + non_io_storage_limit_gb = min(cores * RESERVED_STORAGE_GB_PER_CORE, RESERVED_STORAGE_GB_PER_CORE) + non_io_storage_limit_bytes = int(non_io_storage_limit_gb * 1024**3 + 1) + resources['actual_cpu'] = cores + del resources['cores_mcpu'] # Not all logs will be proper utf-8 but we attempt to show them as # str or else Jinja will present them surrounded by b'' diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index 3a82a0df61c..4478e2efe85 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -493,7 +493,10 @@ async def _read_output(self, ir: Optional[BaseIR], output_uri: str, input_uri: s except FileNotFoundError as exc: raise FatalError('Hail internal error. Please contact the Hail team and provide the following information.\n\n' + yamlx.dump({ 'service_backend_debug_info': self.debug_info(), - 'batch_debug_info': await self._batch.debug_info(), + 'batch_debug_info': await self._batch.debug_info( + _jobs_query_string='failed', + _max_jobs=10 + ), 'input_uri': await self._async_fs.read(input_uri), })) from exc @@ -514,7 +517,10 @@ async def _read_output(self, ir: Optional[BaseIR], output_uri: str, input_uri: s except UnexpectedEOFError as exc: raise FatalError('Hail internal error. Please contact the Hail team and provide the following information.\n\n' + yamlx.dump({ 'service_backend_debug_info': self.debug_info(), - 'batch_debug_info': await self._batch.debug_info(), + 'batch_debug_info': await self._batch.debug_info( + _jobs_query_string='failed', + _max_jobs=10 + ), 'in': await self._async_fs.read(input_uri), 'out': await self._async_fs.read(output_uri), })) from exc diff --git a/hail/python/hailtop/batch_client/__init__.py b/hail/python/hailtop/batch_client/__init__.py index 60de0b330b8..f9219abf05d 100644 --- a/hail/python/hailtop/batch_client/__init__.py +++ b/hail/python/hailtop/batch_client/__init__.py @@ -1,4 +1,4 @@ -from . import client, aioclient, parse +from . import client, aioclient, parse, types from .aioclient import BatchAlreadyCreatedError, BatchNotCreatedError, JobAlreadySubmittedError, JobNotSubmittedError __all__ = [ @@ -9,4 +9,5 @@ 'client', 'aioclient', 'parse', + 'types', ] diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 7e607c12074..7697476b8fe 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -1,4 +1,4 @@ -from typing import AsyncIterator, Optional, Dict, Any, List, Tuple, Union +from typing import Optional, Dict, Any, List, Tuple, Union, AsyncIterator, TypedDict, cast import math import random import logging @@ -17,6 +17,7 @@ from hailtop.utils.rich_progress_bar import is_notebook, BatchProgressBar, BatchProgressBarTask from hailtop import httpx +from .types import GetJobsResponseV1Alpha, JobListEntryV1Alpha, GetJobResponseV1Alpha from .globals import tasks, complete_states log = logging.getLogger('batch_client.aioclient') @@ -173,7 +174,7 @@ def _get_duration(container_status): return sum(durations) # type: ignore @staticmethod - def submitted_job(batch: 'Batch', job_id: int, _status: Optional[dict] = None): + def submitted_job(batch: 'Batch', job_id: int, _status: Optional[GetJobResponseV1Alpha] = None): return Job(batch, AbsoluteJobId(job_id), _status=_status) @staticmethod @@ -184,7 +185,7 @@ def __init__(self, batch: 'Batch', job_id: Union[AbsoluteJobId, InUpdateJobId], *, - _status: Optional[dict] = None): + _status: Optional[GetJobResponseV1Alpha] = None): self._batch = batch self._job_id = job_id self._status = _status @@ -227,7 +228,9 @@ async def attributes(self): if not self._status: await self.status() assert self._status is not None - return self._status['attributes'] + if 'attributes' in self._status: + return self._status['attributes'] + return {} async def _is_job_in_state(self, states): await self.status() @@ -267,9 +270,12 @@ async def status(self) -> Dict[str, Any]: return self._status async def wait(self) -> Dict[str, Any]: - return await self._wait_for_states(*complete_states) + return cast( + Dict[str, Any], # https://stackoverflow.com/a/76515675/6823256 + await self._wait_for_states(*complete_states) + ) - async def _wait_for_states(self, *states: str) -> Dict[str, Any]: + async def _wait_for_states(self, *states: str) -> GetJobResponseV1Alpha: tries = 0 while True: if await self._is_job_in_state(states) or await self.is_complete(): @@ -307,6 +313,10 @@ class BatchAlreadyCreatedError(Exception): pass +class BatchDebugInfo(TypedDict): + status: Dict[str, Any] + jobs: List[JobListEntryV1Alpha] + class Batch: def __init__(self, client: 'BatchClient', @@ -356,7 +366,10 @@ async def cancel(self): self._raise_if_not_created() await self._client._patch(f'/api/v1alpha/batches/{self.id}/cancel') - async def jobs(self, q: Optional[str] = None, version: Optional[int] = None) -> AsyncIterator[Dict[str, Any]]: + async def jobs(self, + q: Optional[str] = None, + version: Optional[int] = None + ) -> AsyncIterator[JobListEntryV1Alpha]: self._raise_if_not_created() if version is None: version = 1 @@ -368,7 +381,10 @@ async def jobs(self, q: Optional[str] = None, version: Optional[int] = None) -> if last_job_id is not None: params['last_job_id'] = last_job_id resp = await self._client._get(f'/api/v{version}alpha/batches/{self.id}/jobs', params=params) - body = await resp.json() + body = cast( + GetJobsResponseV1Alpha, + await resp.json() + ) for job in body['jobs']: yield job last_job_id = body.get('last_job_id') @@ -463,11 +479,17 @@ async def wait(self, with BatchProgressBar(disable=disable_progress_bar) as progress2: return await self._wait(description, progress2, disable_progress_bar, starting_job) - async def debug_info(self): + async def debug_info(self, + _jobs_query_string: Optional[str] = None, + _max_jobs: Optional[int] = None, + ) -> BatchDebugInfo: self._raise_if_not_created() batch_status = await self.status() jobs = [] - async for j_status in self.jobs(): + async for j_status in self.jobs(q=_jobs_query_string): + if _max_jobs and len(jobs) == _max_jobs: + break + id = j_status['job_id'] log, job = await asyncio.gather(self.get_job_log(id), self.get_job(id)) jobs.append({'log': log, 'status': job._status}) @@ -926,7 +948,10 @@ async def list_batches(self, q=None, last_batch_id=None, limit=2 ** 64, version= async def get_job(self, batch_id, job_id): b = await self.get_batch(batch_id) j_resp = await self._get(f'/api/v1alpha/batches/{batch_id}/jobs/{job_id}') - j = await j_resp.json() + j = cast( + GetJobResponseV1Alpha, + await j_resp.json() + ) return Job.submitted_job(b, j['job_id'], _status=j) async def get_job_log(self, batch_id, job_id) -> Dict[str, Any]: diff --git a/hail/python/hailtop/batch_client/types.py b/hail/python/hailtop/batch_client/types.py new file mode 100644 index 00000000000..2697cb34ee6 --- /dev/null +++ b/hail/python/hailtop/batch_client/types.py @@ -0,0 +1,43 @@ +from typing import TypedDict, Literal, Optional, List, Any, Dict +from typing_extensions import NotRequired + + +class CostBreakdownEntry(TypedDict): + resource: str + cost: float + + +class GetJobResponseV1Alpha(TypedDict): + batch_id: int + job_id: int + name: Optional[str] + user: str + billing_project: str + state: Literal['Pending', 'Ready', 'Creating', 'Running', 'Failed', 'Cancelled', 'Error', 'Success'] + exit_code: Optional[int] + duration: Optional[int] + cost: Optional[float] + msec_mcpu: int + cost_breakdown: List[CostBreakdownEntry] + status: Optional[Dict[str, Any]] + spec: Optional[Dict[str, Any]] + attributes: NotRequired[Dict[str, str]] + + +class JobListEntryV1Alpha(TypedDict): + batch_id: int + job_id: int + name: Optional[str] + user: str + billing_project: str + state: Literal['Pending', 'Ready', 'Creating', 'Running', 'Failed', 'Cancelled', 'Error', 'Success'] + exit_code: Optional[int] + duration: Optional[int] + cost: Optional[float] + msec_mcpu: int + cost_breakdown: List[CostBreakdownEntry] + + +class GetJobsResponseV1Alpha(TypedDict): + jobs: List[JobListEntryV1Alpha] + last_job_id: NotRequired[int] diff --git a/hail/python/hailtop/hailctl/batch/cli.py b/hail/python/hailtop/hailctl/batch/cli.py index 09bd5a740b9..c8e0017f9f3 100644 --- a/hail/python/hailtop/hailctl/batch/cli.py +++ b/hail/python/hailtop/hailctl/batch/cli.py @@ -4,7 +4,7 @@ from typer import Option as Opt, Argument as Arg import json -from typing import Optional, List, Annotated as Ann +from typing import Optional, List, Annotated as Ann, cast, Dict, Any from . import list_batches from . import billing @@ -146,7 +146,12 @@ def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredF if job is not None: assert job._status - print(make_formatter(output)([job._status])) + print(make_formatter(output)([ + cast( + Dict[str, Any], # https://stackoverflow.com/q/71986632/6823256 + job._status + ) + ])) else: print(f"Job with ID {job_id} on batch {batch_id} not found") diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index ba0b0eea65d..574ce4646c0 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -74,7 +74,7 @@ def first_extant_file(*files: Optional[str]) -> Optional[str]: return None -def cost_str(cost: Optional[int]) -> Optional[str]: +def cost_str(cost: Optional[float]) -> Optional[str]: if cost is None: return None if cost == 0.0: From 7f6551674eec478df23450b941e9f4f872893d75 Mon Sep 17 00:00:00 2001 From: Patrick Schultz Date: Wed, 6 Sep 2023 11:13:26 -0400 Subject: [PATCH 03/15] =?UTF-8?q?[compiler,=20lir]=20don=E2=80=99t=20compu?= =?UTF-8?q?te=20unused=20loop=20regions=20(#13566)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the code computing loop regions in lir. They aren't currently being used, and the recursive function is causing stack overflows. --- hail/src/main/scala/is/hail/lir/PST.scala | 40 ------------------- .../main/scala/is/hail/lir/SplitMethod.scala | 1 + 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/hail/src/main/scala/is/hail/lir/PST.scala b/hail/src/main/scala/is/hail/lir/PST.scala index 90e7fa17825..de57b4507b1 100644 --- a/hail/src/main/scala/is/hail/lir/PST.scala +++ b/hail/src/main/scala/is/hail/lir/PST.scala @@ -481,52 +481,13 @@ class PSTBuilder( val newBlocks = new Blocks(newBlocksB.result()) val newRegions = newRegionsB.result() - val nNewRegions = newRegions.length val newRoot = regionNewRegion(root) val newCFG = CFG(m, newBlocks) - val loopRegion = new java.util.BitSet(nNewRegions) - - // returns sources of backedges that end in region i - // but are not contained in region i - def findLoopRegions(i: Int): Array[Int] = { - val r = newRegions(i) - val backEdgeSourcesB = new IntArrayBuilder() - if (r.children.nonEmpty) { - var c = 0 - while (c < r.children.length) { - val ci = r.children(c) - val childBackEdgeSources = findLoopRegions(ci) - var j = 0 - while (j < childBackEdgeSources.length) { - val s = childBackEdgeSources(j) - if (s <= r.end) - loopRegion.set(i) - else - backEdgeSourcesB += s - j += 1 - } - c += 1 - } - } else { - assert(r.start == r.end) - for (s <- newCFG.pred(r.start)) { - if (s == r.end) - loopRegion.set(i) - else if (s > r.end) - backEdgeSourcesB += s - } - } - backEdgeSourcesB.result() - } - - findLoopRegions(newRoot) - val pst = new PST( newSplitBlock.result(), newRegions, - loopRegion, newRoot) new PSTResult(newBlocks, newCFG, pst) } @@ -542,7 +503,6 @@ object PST { class PST( val splitBlock: Array[Boolean], val regions: Array[PSTRegion], - val loopRegion: java.util.BitSet, val root: Int ) { def nBlocks: Int = splitBlock.length diff --git a/hail/src/main/scala/is/hail/lir/SplitMethod.scala b/hail/src/main/scala/is/hail/lir/SplitMethod.scala index 37e8dea354d..1a89ee9a4fa 100644 --- a/hail/src/main/scala/is/hail/lir/SplitMethod.scala +++ b/hail/src/main/scala/is/hail/lir/SplitMethod.scala @@ -526,6 +526,7 @@ class SplitMethod( assert(r.start == r.end) regionSize(i) = blockSize(blockPartitions.find(r.start)) } + // The PST no longer computes loop regions. See PR #13566 for the removed code. /* if (i != pst.root && pst.loopRegion.get(i)) { splitSlice(r.start, r.end) From 7343e9c368dcb5e57677108087d468f7e45a68f5 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 12:52:07 -0400 Subject: [PATCH 04/15] [release] 0.2.121 (#13529) I also had to fix issues that pyright discovered. --- hail/Makefile | 2 +- hail/python/hail/docs/change_log.md | 58 +++++++++++++++++++ hail/python/hailtop/batch/docs/change_log.rst | 4 ++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/hail/Makefile b/hail/Makefile index f4d1764016a..3eeb4f1667c 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.15 SPARK_VERSION ?= 3.3.0 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 120 +HAIL_PATCH_VERSION := 121 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 346fcc7d0ad..f6e693d5b45 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -52,6 +52,64 @@ supports. policy. Their functionality or even existence may change without notice. Please contact us if you critically depend on experimental functionality.** +## Version 0.2.121 + +Released 2023-08-31 + +### New Features + +- (hail#13385) The VDS combiner now supports arbitrary custom call fields via the `call_fields` + parameter. +- (hail#13224) `hailctl config get`, `set`, and `unset` now support shell auto-completion. Run + `hailctl --install-completion zsh` to install the auto-completion for `zsh`. You must already have + completion enabled for `zsh`. +- (hail#13279) Add `hailctl batch init` which helps new users interactively set up `hailctl` for + Query-on-Batch and Batch use. + +### Bug Fixes +- (hail#13327) Fix (hail#12936) in which VEP frequently failed (due to Docker not starting up) on + clusters with a non-trivial number of workers. +- (hail#13485) Fix (hail#13479) in which `hl.vds.local_to_global` could produce invalid values when + the LA field is too short. There were and are no issues when the LA field has the correct length. +- (hail#13340) Fix `copy_log` to correctly copy relative file paths. +- (hail#13364) `hl.import_gvcf_interval` now treats `PGT` as a call field. +- (hail#13333) Fix interval filtering regression: `filter_rows` or `filter` mentioning the same + field twice or using two fields incorrectly read the entire dataset. In 0.2.121, these filters + will correctly read only the relevant subset of the data. +- (hail#13368) In Azure, Hail now uses fewer "list blobs" operations. This should reduce cost on + pipelines that import many files, export many of files, or use file glob expressions. +- (hail#13414) Resolves (hail#13407) in which uses of `union_rows` could reduce parallelism to one + partition resulting in severely degraded performance. +- (hail#13405) `MatrixTable.aggregate_cols` no longer forces a distributed computation. This should + be what you want in the majority of cases. In case you know the aggregation is very slow and + should be parallelized, use `mt.cols().aggregate` instead. +- (hail#13460) In Query-on-Spark, restore `hl.read_table` optimization that avoids reading + unnecessary data in pipelines that do not reference row fields. +- (hail#13447) Fix (hail#13446). In all three submit commands (`batch`, `dataproc`, and + `hdinsight`), Hail now allows and encourages the use of -- to separate arguments meant for the + user script from those meant for hailctl. In hailctl batch submit, option-like arguments, for + example "--foo", are now supported before "--" if and only if they do not conflict with a hailctl + option. +- (hail#13422) `hailtop.hail_frozenlist.frozenlist` now has an eval-able `repr`. +- (hail#13523) `hl.Struct` is now pickle-able. +- (hail#13505) Fix bug introduced in 0.2.117 by commit `c9de81108` which prevented the passing of + keyword arguments to Python jobs. This manifested as "ValueError: too many values to unpack". +- (hail#13536) Fixed (hail#13535) which prevented the use of Python jobs when the client (e.g. your + laptop) Python version is 3.11 or later. +- (hail#13434) In QoB, Hail's file systems now correctly list all files in a directory, not just the + first 1000. This could manifest in an `import_table` or `import_vcf` which used a glob + expression. In such a case, only the first 1000 files would have been included in the resulting + Table or MatrixTable. +- (hail#13550) `hl.utils.range_table(n)` now supports all valid 32-bit signed integer values of `n`. +- (hail#13500) In Query-on-Batch, the client-side Python code will not try to list every job when a + QoB batch fails. This could take hours for long-running pipelines or pipelines with many + partitions. + + +### Deprecations + +- (hail#13275) Hail no longer officially supports Python 3.8. + ## Version 0.2.120 Released 2023-07-20 diff --git a/hail/python/hailtop/batch/docs/change_log.rst b/hail/python/hailtop/batch/docs/change_log.rst index 4817e4dc0d2..c15e9a8ec4d 100644 --- a/hail/python/hailtop/batch/docs/change_log.rst +++ b/hail/python/hailtop/batch/docs/change_log.rst @@ -15,6 +15,10 @@ versions. In particular, Hail officially supports: Change Log ========== +**Version 0.2.121** + +- (`#13396 `__) Non-spot instances can be requested via the :meth:`.Job.spot` method. + **Version 0.2.117** - (`#13007 `__) Memory and storage request strings may now be optionally terminated with a `B` for bytes. From e0bf6e644fb33058d102d8c5b124f949c0506f63 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 14:04:40 -0400 Subject: [PATCH 05/15] [query] MT.tail should prefer `n_rows` to `n`. (#13508) CHANGELOG: The `n` parameter of `MatrixTable.tail` is deprecated in favor of a new `n_rows` parameter. MT.head already does this. --- hail/python/hail/matrixtable.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index 496293897bf..e2f027f7938 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -17,6 +17,7 @@ from hail.utils.misc import wrap_to_tuple, \ get_key_by_exprs, \ get_select_exprs, check_annotate_exprs, process_joins +import warnings class GroupedMatrixTable(ExprContainer): @@ -4033,8 +4034,10 @@ def head(self, n_rows: Optional[int], n_cols: Optional[int] = None, *, n: Option if n_rows is not None: n_rows_name = 'n_rows' else: + warnings.warn("MatrixTable.head: the 'n' parameter is deprecated in favor of 'n_rows'.") n_rows = n n_rows_name = 'n' + del n mt = self if n_rows is not None: @@ -4047,8 +4050,8 @@ def head(self, n_rows: Optional[int], n_cols: Optional[int] = None, *, n: Option mt = MatrixTable(ir.MatrixColsHead(mt._mir, n_cols)) return mt - @typecheck_method(n=nullable(int), n_cols=nullable(int)) - def tail(self, n: Optional[int], n_cols: Optional[int] = None) -> 'MatrixTable': + @typecheck_method(n_rows=nullable(int), n_cols=nullable(int), n=nullable(int)) + def tail(self, n_rows: Optional[int], n_cols: Optional[int] = None, *, n: Optional[int] = None) -> 'MatrixTable': """Subset matrix to last `n` rows. Examples @@ -4087,21 +4090,34 @@ def tail(self, n: Optional[int], n_cols: Optional[int] = None) -> 'MatrixTable': Parameters ---------- - n : :obj:`int` + n_rows : :obj:`int` Number of rows to include (all rows included if ``None``). n_cols : :obj:`int`, optional Number of cols to include (all cols included if ``None``). + n : :obj:`int` + Deprecated in favor of n_rows. Returns ------- :class:`.MatrixTable` Matrix including the last `n` rows and last `n_cols` cols. """ + if n_rows is not None and n is not None: + raise ValueError('Both n and n_rows specified. Only one may be specified.') + + if n_rows is not None: + n_rows_name = 'n_rows' + else: + warnings.warn("MatrixTable.tail: the 'n' parameter is deprecated in favor of 'n_rows'.") + n_rows = n + n_rows_name = 'n' + del n + mt = self - if n is not None: - if n < 0: - raise ValueError(f"MatrixTable.tail: expect 'n' to be non-negative or None, found '{n}'") - mt = MatrixTable(ir.MatrixRowsTail(mt._mir, n)) + if n_rows is not None: + if n_rows < 0: + raise ValueError(f"MatrixTable.tail: expect '{n_rows_name}' to be non-negative or None, found '{n_rows}'") + mt = MatrixTable(ir.MatrixRowsTail(mt._mir, n_rows)) if n_cols is not None: if n_cols < 0: raise ValueError(f"MatrixTable.tail: expect 'n_cols' to be non-negative or None, found '{n_cols}'") From 56f62313f76f4115e9d1bf97e27c797157193ad1 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 18:37:39 -0400 Subject: [PATCH 06/15] [dataproc] restore parallel fetching of resources (#13574) This appers to have been lost in #8268. There is no explicit mention of it and it was inconsistently removed (e.g. loftee in GRCh38 still has the `&`). I assume it was a mistake. --- hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh | 4 ++-- hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh index 4d727eb8894..e8c55a927e8 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh +++ b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh @@ -28,8 +28,8 @@ apt-get install -y --allow-unauthenticated docker-ce gcloud storage cp --billing-project $PROJECT gs://hail-us-vep/vep85-loftee-gcloud.json /vep_data/vep85-gcloud.json ln -s /vep_data/vep85-gcloud.json $VEP_CONFIG_PATH -gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/loftee-beta/${ASSEMBLY}.tar | tar -xf - -C /vep_data -gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/homo-sapiens/85_${ASSEMBLY}.tar | tar -xf - -C /vep_data/homo_sapiens +gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/loftee-beta/${ASSEMBLY}.tar | tar -xf - -C /vep_data & +gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/homo-sapiens/85_${ASSEMBLY}.tar | tar -xf - -C /vep_data/homo_sapiens & docker pull ${VEP_DOCKER_IMAGE} & wait diff --git a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh index 991b4956e2a..8fd7022f56e 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh +++ b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh @@ -29,7 +29,7 @@ gcloud storage cp --billing-project $PROJECT gs://hail-us-vep/vep95-GRCh38-lofte ln -s /vep_data/vep95-GRCh38-gcloud.json $VEP_CONFIG_PATH gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/loftee-beta/${ASSEMBLY}.tar | tar -xf - -C /vep_data/ & -gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/homo-sapiens/95_${ASSEMBLY}.tar | tar -xf - -C /vep_data/homo_sapiens +gcloud storage cat --billing-project $PROJECT gs://${VEP_BUCKET}/homo-sapiens/95_${ASSEMBLY}.tar | tar -xf - -C /vep_data/homo_sapiens & docker pull ${VEP_DOCKER_IMAGE} & wait From 35da6f47ddcd1345ac2411b921c46f825fc31ae2 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 18:59:28 -0400 Subject: [PATCH 07/15] [test-dataproc] make test-dataproc-37 and -38 not race (#13573) Consider, for example, this deploy: https://ci.hail.is/batches/7956812. `test-dataproc-37` succeeded but `test-dataproc-38` failed (it timed out b/c the master failed to come online). You can see the error logs for the cluster here: https://cloudlogging.app.goo.gl/t1ux8oqy11Ba2dih7 It states a certain file either did not exist or we did not have permission to access it. [`test_dataproc-37`](https://batch.hail.is/batches/7956812/jobs/193) and [`test_dataproc-38`](https://batch.hail.is/batches/7956812/jobs/194) started around the same time and both uploaded four files into: gs://hail-30-day/hailctl/dataproc/ci_test_dataproc/0.2.121-7343e9c368dc/ And then set it to public read/write. The public read/write means that permissions are not the issue. Instead, the issue is that there must be some sort of race condition in GCS which means that if you "patch" (aka overwrite) an existing file, it is possible that a concurrent reader will see the file as not existing. Unfortunately, I cannot confirm this with audit logs of the writes and read because [public objects do not generate audit logs](https://cloud.google.com/logging/docs/audit#data-access). > Publicly available resources that have the Identity and Access Management policies [allAuthenticatedUsers](https://cloud.google.com/iam/docs/overview#allauthenticatedusers) or [allUsers](https://cloud.google.com/iam/docs/overview#allusers) don't generate audit logs. Resources that can be accessed without logging into a Google Cloud, Google Workspace, Cloud Identity, or Drive Enterprise account don't generate audit logs. This helps protect end-user identities and information. --- build.yaml | 4 ++-- hail/python/hail/docs/change_log.md | 5 ++++- .../hailtop/hailctl/dataproc/resources/vep-GRCh37.sh | 6 ++++++ .../hailtop/hailctl/dataproc/resources/vep-GRCh38.sh | 8 ++++++-- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/build.yaml b/build.yaml index d909a3b3ac2..388921695d0 100644 --- a/build.yaml +++ b/build.yaml @@ -3210,7 +3210,7 @@ steps: cd hail chmod 755 ./gradlew time retry ./gradlew --version - make test-dataproc-37 DEV_CLARIFIER=ci_test_dataproc + make test-dataproc-37 DEV_CLARIFIER=ci_test_dataproc-37 dependsOn: - ci_utils_image - default_ns @@ -3252,7 +3252,7 @@ steps: cd hail chmod 755 ./gradlew time retry ./gradlew --version - make test-dataproc-38 DEV_CLARIFIER=ci_test_dataproc + make test-dataproc-38 DEV_CLARIFIER=ci_test_dataproc-38 dependsOn: - ci_utils_image - default_ns diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index f6e693d5b45..ad76c6fe136 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -67,7 +67,7 @@ Released 2023-08-31 Query-on-Batch and Batch use. ### Bug Fixes -- (hail#13327) Fix (hail#12936) in which VEP frequently failed (due to Docker not starting up) on +- (hail#13573) Fix (hail#12936) in which VEP frequently failed (due to Docker not starting up) on clusters with a non-trivial number of workers. - (hail#13485) Fix (hail#13479) in which `hl.vds.local_to_global` could produce invalid values when the LA field is too short. There were and are no issues when the LA field has the correct length. @@ -109,6 +109,9 @@ Released 2023-08-31 ### Deprecations - (hail#13275) Hail no longer officially supports Python 3.8. +- (hail#13508) The `n` parameter of `MatrixTable.tail` is deprecated in favor of a new `n_rows` + parameter. + ## Version 0.2.120 diff --git a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh index e8c55a927e8..c46d66fb947 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh +++ b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh37.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -x + export PROJECT="$(gcloud config get-value project)" export ASSEMBLY=GRCh37 export VEP_CONFIG_PATH="$(/usr/share/google/get_metadata_value attributes/VEP_CONFIG_PATH)" @@ -24,6 +26,10 @@ sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debi apt-get update apt-get install -y --allow-unauthenticated docker-ce +# https://github.com/hail-is/hail/issues/12936 +sleep 60 +sudo service docker restart + # Get VEP cache and LOFTEE data gcloud storage cp --billing-project $PROJECT gs://hail-us-vep/vep85-loftee-gcloud.json /vep_data/vep85-gcloud.json ln -s /vep_data/vep85-gcloud.json $VEP_CONFIG_PATH diff --git a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh index 8fd7022f56e..c6711157de9 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh +++ b/hail/python/hailtop/hailctl/dataproc/resources/vep-GRCh38.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -x + export PROJECT="$(gcloud config get-value project)" export VEP_CONFIG_PATH="$(/usr/share/google/get_metadata_value attributes/VEP_CONFIG_PATH)" export VEP_REPLICATE="$(/usr/share/google/get_metadata_value attributes/VEP_REPLICATE)" @@ -24,6 +26,10 @@ sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debi apt-get update apt-get install -y --allow-unauthenticated docker-ce +# https://github.com/hail-is/hail/issues/12936 +sleep 60 +sudo service docker restart + # Get VEP cache and LOFTEE data gcloud storage cp --billing-project $PROJECT gs://hail-us-vep/vep95-GRCh38-loftee-gcloud.json /vep_data/vep95-GRCh38-gcloud.json ln -s /vep_data/vep95-GRCh38-gcloud.json $VEP_CONFIG_PATH @@ -56,5 +62,3 @@ docker run -i -v /vep_data/:/opt/vep/.vep/:ro ${VEP_DOCKER_IMAGE} \ /opt/vep/src/ensembl-vep/vep "\$@" EOF chmod +x /vep.sh - -sudo service docker restart From b4912b57bbbcfcd35b427a6d851b2f392a5ee2a5 Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Wed, 6 Sep 2023 20:35:00 -0400 Subject: [PATCH 08/15] [ci] Clean up temporary config for OAuth changes (#13570) All this `HAIL_AZURE_OAUTH_SCOPE` and `HAIL_IDENTITY_PROVIDER_JSON` are now injected by batch workers by default so we don't need to specify them explicitly in `build.yaml`. Unrelatedly, I don't think `create_dummy_oauth2_client_secret` does anything as `auth-oauth2-client-secret` already exists inside new namespaces. --- build.yaml | 59 ------------------------------------------------------ 1 file changed, 59 deletions(-) diff --git a/build.yaml b/build.yaml index 388921695d0..106f43989fd 100644 --- a/build.yaml +++ b/build.yaml @@ -1604,22 +1604,6 @@ steps: - default_ns - create_certs - copy_third_party_images - - kind: runImage - name: create_dummy_oauth2_client_secret - resources: - memory: standard - cpu: '0.25' - image: - valueFrom: ci_utils_image.image - script: | - set -ex - kubectl -n {{ default_ns.name }} create secret generic auth-oauth2-client-secret || true - scopes: - - test - - dev - dependsOn: - - default_ns - - ci_utils_image - kind: deploy name: deploy_auth namespace: @@ -1638,7 +1622,6 @@ steps: - create_session_key - auth_database - auth_image - - create_dummy_oauth2_client_secret - create_certs - create_accounts - kind: runImage @@ -2387,25 +2370,12 @@ steps: export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - {% if global.cloud == "gcp" %} export GCS_REQUESTER_PAYS_PROJECT=broad-ctsa - {% elif global.cloud == "azure" %} - export HAIL_AZURE_SUBSCRIPTION_ID={{ global.azure_subscription_id }} - export HAIL_AZURE_RESOURCE_GROUP={{ global.azure_resource_group }} - export HAIL_AZURE_OAUTH_SCOPE=$(cat /oauth-secret/sp_oauth_scope) - {% endif %} export HAIL_SHUFFLE_MAX_BRANCH=4 export HAIL_SHUFFLE_CUTOFF=1000000 export HAIL_QUERY_BACKEND=batch - {% if global.cloud == "azure" %} - export HAIL_BATCH_REGIONS={{ global.azure_location }} - {% elif global.cloud == "gcp" %} export HAIL_BATCH_REGIONS={{ global.gcp_region }} - {% else %} - echo "unknown cloud {{ global.cloud }}" - exit 1 - {% endif %} export HAIL_BATCH_BILLING_PROJECT=test export HAIL_BATCH_REMOTE_TMPDIR={{ global.test_storage_uri }} @@ -2468,24 +2438,13 @@ steps: export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - {% if global.cloud == "gcp" %} - export GCS_REQUESTER_PAYS_PROJECT=broad-ctsa - {% elif global.cloud == "azure" %} export HAIL_AZURE_SUBSCRIPTION_ID={{ global.azure_subscription_id }} export HAIL_AZURE_RESOURCE_GROUP={{ global.azure_resource_group }} - {% endif %} export HAIL_SHUFFLE_MAX_BRANCH=4 export HAIL_SHUFFLE_CUTOFF=1000000 export HAIL_QUERY_BACKEND=batch - {% if global.cloud == "azure" %} export HAIL_BATCH_REGIONS={{ global.azure_location }} - {% elif global.cloud == "gcp" %} - export HAIL_BATCH_REGIONS={{ global.gcp_region }} - {% else %} - echo "unknown cloud {{ global.cloud }}" - exit 1 - {% endif %} export HAIL_BATCH_BILLING_PROJECT=test export HAIL_BATCH_REMOTE_TMPDIR={{ global.test_storage_uri }} @@ -2511,10 +2470,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: auth-oauth2-client-secret - namespace: - valueFrom: default_ns.name - mountPath: /oauth-secret dependsOn: - default_ns - merge_code @@ -2940,11 +2895,8 @@ steps: export HAIL_GENETICS_HAIL_IMAGE="{{ hailgenetics_hail_image.image }}" {% if global.cloud == "gcp" %} - export HAIL_IDENTITY_PROVIDER_JSON='{"idp": "Google"}' export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json {% elif global.cloud == "azure" %} - export HAIL_IDENTITY_PROVIDER_JSON='{"idp": "Microsoft"}' - export HAIL_AZURE_OAUTH_SCOPE=$(cat /oauth-secret/sp_oauth_scope) export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json {% else %} echo "unknown cloud {{ global.cloud }}" @@ -3039,10 +2991,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: auth-oauth2-client-secret - namespace: - valueFrom: default_ns.name - mountPath: /oauth-secret dependsOn: - hailgenetics_hail_image - upload_query_jar @@ -3548,9 +3496,6 @@ steps: export HAIL_CLOUD={{ global.cloud }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} - {% if global.cloud == "azure" %} - export HAIL_AZURE_OAUTH_SCOPE=$(cat /oauth-secret/sp_oauth_scope) - {% endif %} cd /io mkdir -p src/test @@ -3572,10 +3517,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: auth-oauth2-client-secret - namespace: - valueFrom: default_ns.name - mountPath: /oauth-secret timeout: 1200 dependsOn: - default_ns From 6cdd8f7e861f93d294c529a446a51a24385f5de1 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 6 Sep 2023 22:08:06 -0400 Subject: [PATCH 09/15] [deps] update rich (in query) and numpy (in batch) (#13572) --- batch/pinned-requirements.txt | 2 +- hail/python/dev/pinned-requirements.txt | 16 ++++++++-------- hail/python/hailtop/pinned-requirements.txt | 4 ++-- hail/python/hailtop/requirements.txt | 2 +- hail/python/pinned-requirements.txt | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/batch/pinned-requirements.txt b/batch/pinned-requirements.txt index 227558f7373..84064fd7a27 100644 --- a/batch/pinned-requirements.txt +++ b/batch/pinned-requirements.txt @@ -88,7 +88,7 @@ python-dateutil==2.8.2 # -c hail/batch/../hail/python/dev/pinned-requirements.txt # -c hail/batch/../hail/python/pinned-requirements.txt # pandas -pytz==2023.3 +pytz==2023.3.post1 # via # -c hail/batch/../hail/python/pinned-requirements.txt # pandas diff --git a/hail/python/dev/pinned-requirements.txt b/hail/python/dev/pinned-requirements.txt index cbb0b924223..4a7f1ba744f 100644 --- a/hail/python/dev/pinned-requirements.txt +++ b/hail/python/dev/pinned-requirements.txt @@ -16,7 +16,7 @@ arrow==1.2.3 # via isoduration astroid==2.15.6 # via pylint -asttokens==2.2.1 +asttokens==2.4.0 # via stack-data async-lru==2.0.4 # via jupyterlab @@ -130,7 +130,7 @@ importlib-resources==6.0.1 # via matplotlib iniconfig==2.0.0 # via pytest -ipykernel==6.25.1 +ipykernel==6.25.2 # via # jupyter # jupyter-console @@ -312,7 +312,7 @@ platformdirs==3.10.0 # virtualenv pluggy==1.3.0 # via pytest -pre-commit==3.3.3 +pre-commit==3.4.0 # via -r hail/hail/python/dev/requirements.txt prometheus-client==0.17.1 # via jupyter-server @@ -348,7 +348,7 @@ pyparsing==3.0.9 # via matplotlib pyright==1.1.325 # via -r hail/hail/python/dev/requirements.txt -pytest==7.4.0 +pytest==7.4.1 # via # -r hail/hail/python/dev/requirements.txt # pytest-asyncio @@ -395,7 +395,7 @@ pyzmq==25.1.1 # jupyter-console # jupyter-server # qtconsole -qtconsole==5.4.3 +qtconsole==5.4.4 # via jupyter qtpy==2.4.0 # via qtconsole @@ -417,11 +417,11 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.10.0 +rpds-py==0.10.2 # via # jsonschema # referencing -ruff==0.0.286 +ruff==0.0.287 # via -r hail/hail/python/dev/requirements.txt send2trash==1.8.2 # via jupyter-server @@ -436,7 +436,7 @@ sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx -soupsieve==2.4.1 +soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 # via diff --git a/hail/python/hailtop/pinned-requirements.txt b/hail/python/hailtop/pinned-requirements.txt index 600dc7ae90f..fc25f1229c8 100644 --- a/hail/python/hailtop/pinned-requirements.txt +++ b/hail/python/hailtop/pinned-requirements.txt @@ -30,9 +30,9 @@ azure-mgmt-storage==20.1.0 # via -r hail/hail/python/hailtop/requirements.txt azure-storage-blob==12.17.0 # via -r hail/hail/python/hailtop/requirements.txt -boto3==1.28.39 +boto3==1.28.41 # via -r hail/hail/python/hailtop/requirements.txt -botocore==1.31.39 +botocore==1.31.41 # via # -r hail/hail/python/hailtop/requirements.txt # boto3 diff --git a/hail/python/hailtop/requirements.txt b/hail/python/hailtop/requirements.txt index 827a2351478..9ba716f9c2d 100644 --- a/hail/python/hailtop/requirements.txt +++ b/hail/python/hailtop/requirements.txt @@ -15,7 +15,7 @@ janus>=0.6,<1.1 nest_asyncio>=1.5.4,<2 orjson>=3.6.4,<4 protobuf==3.20.2 -rich==12.6.0 +rich>=12.6.0,<13 typer>=0.9.0,<1 python-json-logger>=2.0.2,<3 pyyaml>=6.0,<7.0 diff --git a/hail/python/pinned-requirements.txt b/hail/python/pinned-requirements.txt index 349faaeb1a0..de9bed19adf 100644 --- a/hail/python/pinned-requirements.txt +++ b/hail/python/pinned-requirements.txt @@ -57,11 +57,11 @@ azure-storage-blob==12.17.0 # -r hail/hail/python/hailtop/requirements.txt bokeh==3.2.2 # via -r hail/hail/python/requirements.txt -boto3==1.28.39 +boto3==1.28.41 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # -r hail/hail/python/hailtop/requirements.txt -botocore==1.31.39 +botocore==1.31.41 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # -r hail/hail/python/hailtop/requirements.txt @@ -286,7 +286,7 @@ python-json-logger==2.0.7 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # -r hail/hail/python/hailtop/requirements.txt -pytz==2023.3 +pytz==2023.3.post1 # via pandas pyyaml==6.0.1 # via From 8586275ec4775e4302420c7051796b45b8413b21 Mon Sep 17 00:00:00 2001 From: jigold Date: Wed, 6 Sep 2023 23:27:13 -0400 Subject: [PATCH 10/15] [batch] Allow hailgenetics/vep images to be public (#13565) --- batch/batch/publicly_available_images.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/batch/batch/publicly_available_images.py b/batch/batch/publicly_available_images.py index 25a3c09f74f..134f70901a5 100644 --- a/batch/batch/publicly_available_images.py +++ b/batch/batch/publicly_available_images.py @@ -1,13 +1,7 @@ from typing import List +from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES + def publicly_available_images(docker_prefix: str) -> List[str]: - return [ - f'{docker_prefix}/hailgenetics/{name}' - for name in ( - 'hail', - 'hailtop', - 'genetics', - 'python-dill', - ) - ] + return [docker_prefix + '/' + image_name for image_name in HAIL_GENETICS_IMAGES] From 9cd874c3def42a7142b78697cdc2032bc43bf906 Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 7 Sep 2023 01:20:57 -0400 Subject: [PATCH 11/15] [security] update scipy pin to 1.11.1 (#13571) Supersedes https://github.com/hail-is/hail/pull/13228 . Resolves [CVE-2023-25399](https://nvd.nist.gov/vuln/detail/CVE-2023-25399). --- hail/python/pinned-requirements.txt | 2 +- hail/python/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/pinned-requirements.txt b/hail/python/pinned-requirements.txt index de9bed19adf..c2da73a402f 100644 --- a/hail/python/pinned-requirements.txt +++ b/hail/python/pinned-requirements.txt @@ -322,7 +322,7 @@ s3transfer==0.6.2 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # boto3 -scipy==1.9.3 +scipy==1.11.2 # via -r hail/hail/python/requirements.txt six==1.16.0 # via diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 71be0275ec4..35f4a3edfcf 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -14,4 +14,4 @@ plotly>=5.5.0,<6 protobuf==3.20.2 pyspark>=3.3.0,<3.4 requests>=2.25.1,<3 -scipy>1.2,<1.10 +scipy>1.2,<1.12 From 7fb60084065171eee538587ed709653a1f1f1b3c Mon Sep 17 00:00:00 2001 From: Patrick Schultz Date: Thu, 7 Sep 2023 02:27:43 -0400 Subject: [PATCH 12/15] [query] fix transposed blanczos (#13552) --- hail/python/hail/methods/pca.py | 3 +- hail/python/test/hail/methods/test_pca.py | 42 +++++++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/hail/python/hail/methods/pca.py b/hail/python/hail/methods/pca.py index 089d75ac0a8..7e91067e71a 100644 --- a/hail/python/hail/methods/pca.py +++ b/hail/python/hail/methods/pca.py @@ -601,7 +601,8 @@ def _blanczos_pca(A, k=10, compute_loadings=False, q_iterations=10, oversampling if oversampling_param is None: oversampling_param = k - U, S, V = _reduced_svd(A, k, compute_loadings, q_iterations, k + oversampling_param) + compute_U = (not transpose and compute_loadings) or (transpose and compute_scores) + U, S, V = _reduced_svd(A, k, compute_U, q_iterations, k + oversampling_param) info("blanczos_pca: SVD Complete. Computing conversion to PCs.") def numpy_to_rows_table(X, field_name): diff --git a/hail/python/test/hail/methods/test_pca.py b/hail/python/test/hail/methods/test_pca.py index 045510f68f8..aa6c39f95fd 100644 --- a/hail/python/test/hail/methods/test_pca.py +++ b/hail/python/test/hail/methods/test_pca.py @@ -139,12 +139,12 @@ def matrix_table_from_numpy(np_mat): @test_timeout(batch=5 * 60) def test_blanczos_T(): k, m, n = 10, 100, 200 - sigma = np.diag([spec1(i + 1, k) for i in range(m)]) + sigma = [spec1(i + 1, k) for i in range(m)] seed = 1025 np.random.seed(seed) U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] V = np.linalg.qr(np.random.normal(0, 1, (n, m)))[0] - A = U @ sigma @ V.T + A = (U * sigma) @ V.T mt_A_T = matrix_table_from_numpy(A.T) eigenvalues, scores, loadings = hl._blanczos_pca(mt_A_T.ent, k=k, oversampling_param=k, q_iterations=4, compute_loadings=True, transpose=True) @@ -154,7 +154,43 @@ def test_blanczos_T(): approx_A = hail_U @ np.diag(singulars) @ hail_V norm_of_diff = np.linalg.norm(A - approx_A, 2) np.testing.assert_allclose(norm_of_diff, spec1(k + 1, k), rtol=1e-02) - np.testing.assert_allclose(singulars, np.diag(sigma)[:k], rtol=1e-01) + np.testing.assert_allclose(singulars, sigma[:k], rtol=1e-01) + +@skip_when_service_backend() +def test_blanczos_flags(): + k, m, n = 10, 100, 200 + sigma = [spec1(i + 1, k) for i in range(m)] + seed = 1025 + np.random.seed(seed) + U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] + V = np.linalg.qr(np.random.normal(0, 1, (n, m)))[0] + A = (U * sigma) @ V.T + mt_A = matrix_table_from_numpy(A) + mt_A_T = matrix_table_from_numpy(A.T) + # compare absolute values to account for +-1 indeterminacy factor in singular vectors + U = np.abs(U[:, :k]) + V = np.abs(V[:, :k]) + Usigma = U * sigma[:k] + Vsigma = V * sigma[:k] + + for compute_loadings in [True, False]: + for compute_scores in [True, False]: + for transpose in [True, False]: + mt = mt_A_T if transpose else mt_A + eigenvalues, scores, loadings = hl._blanczos_pca(mt.ent, k=k, oversampling_param=k, q_iterations=4, compute_loadings=compute_loadings, compute_scores=compute_scores, transpose=transpose) + if compute_loadings: + loadings = np.array(loadings.loadings.collect()) + np.testing.assert_allclose(np.abs(loadings), U, rtol=1e-02) + else: + assert loadings is None + if compute_scores: + scores = np.array(scores.scores.collect()) + np.testing.assert_allclose(np.abs(scores), Vsigma, rtol=1e-02) + else: + assert scores is None + singulars = np.sqrt(eigenvalues) + np.testing.assert_allclose(singulars, sigma[:k], rtol=1e-01) + def spectra_helper(spec_func, triplet): k, m, n = triplet From c160d3e2713f7484e11a9d92f5679450122a5b52 Mon Sep 17 00:00:00 2001 From: Patrick Schultz Date: Thu, 7 Sep 2023 13:43:35 -0400 Subject: [PATCH 13/15] [query] fix shadowing of field names by methods (#13498) In particular, struct field names which clash with methods on `StructExpression`. close #13495 CHANGELOG: Fix a bug where field names can shadow methods on the StructExpression class, e.g. "items", "keys", "values". Now the only way to access such fields is through the getitem syntax, e.g. "some_struct['items']". It's possible this could break existing code that uses such field names. --- .../hail/expr/aggregators/aggregators.py | 2 +- .../expr/expressions/typed_expressions.py | 27 ++++++++++++----- hail/python/hail/expr/functions.py | 6 ++-- hail/python/test/hail/expr/test_expr.py | 29 +++++++++++++++++++ 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/hail/python/hail/expr/aggregators/aggregators.py b/hail/python/hail/expr/aggregators/aggregators.py index 1366c6e23c6..836d7526649 100644 --- a/hail/python/hail/expr/aggregators/aggregators.py +++ b/hail/python/hail/expr/aggregators/aggregators.py @@ -339,7 +339,7 @@ def approx_cdf(expr, k=100): tfloat32: lambda x: x.map(hl.float32), tfloat64: identity } - return hl.struct(values=conv[expr.dtype](res.values), ranks=res.ranks, _compaction_counts=res._compaction_counts) + return hl.struct(values=conv[expr.dtype](res['values']), ranks=res.ranks, _compaction_counts=res._compaction_counts) @typecheck(expr=expr_numeric, qs=expr_oneof(expr_numeric, expr_array(expr_numeric)), k=int) diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index bf78d9b6e3e..9b9a9351e2f 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -1743,16 +1743,18 @@ def _from_fields(cls, fields: 'Dict[str, Expression]'): x = ir.MakeStruct([(n, expr._ir) for (n, expr) in fields.items()]) indices, aggregations = unify_all(*fields.values()) s = StructExpression.__new__(cls) + super(StructExpression, s).__init__(x, t, indices, aggregations) + s._warn_on_shadowed_name = set() s._fields = {} for k, v in fields.items(): s._set_field(k, v) - super(StructExpression, s).__init__(x, t, indices, aggregations) return s @typecheck_method(x=ir.IR, type=HailType, indices=Indices, aggregations=LinkedList) def __init__(self, x, type, indices=Indices(), aggregations=LinkedList(Aggregation)): super(StructExpression, self).__init__(x, type, indices, aggregations) self._fields: Dict[str, Expression] = {} + self._warn_on_shadowed_name = set() for i, (f, t) in enumerate(self.dtype.items()): if isinstance(self._ir, ir.MakeStruct): @@ -1778,9 +1780,14 @@ def __init__(self, x, type, indices=Indices(), aggregations=LinkedList(Aggregati self._set_field(f, expr) def _set_field(self, key, value): - self._fields[key] = value - if key not in self.__dict__: - self.__dict__[key] = value + if key not in self._fields: + # Avoid using hasattr on self. Each new field added will fall through to __getattr__, + # which has to build a nice error message. + if key in self.__dict__ or hasattr(super(), key): + self._warn_on_shadowed_name.add(key) + else: + self.__dict__[key] = value + self._fields[key] = value def _get_field(self, item): if item in self._fields: @@ -1788,11 +1795,15 @@ def _get_field(self, item): else: raise KeyError(get_nice_field_error(self, item)) + def __getattribute__(self, item): + if item in super().__getattribute__('_warn_on_shadowed_name'): + warning(f'Field {item} is shadowed by another method or attribute. ' + f'Use ["{item}"] syntax to access the field.') + self._warn_on_shadowed_name.remove(item) + return super().__getattribute__(item) + def __getattr__(self, item): - if item in self.__dict__: - return self.__dict__[item] - else: - raise AttributeError(get_nice_attr_error(self, item)) + raise AttributeError(get_nice_attr_error(self, item)) def __len__(self): return len(self._fields) diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index 938a4a6ec6c..a7d4de4ab2a 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -79,10 +79,10 @@ def _quantile_from_cdf(cdf, q): def compute(cdf): n = cdf.ranks[cdf.ranks.length() - 1] pos = hl.int64(q * n) + 1 - idx = hl.max(0, hl.min(cdf.values.length() - 1, _lower_bound(cdf.ranks, pos) - 1)) + idx = hl.max(0, hl.min(cdf['values'].length() - 1, _lower_bound(cdf.ranks, pos) - 1)) res = hl.if_else(n == 0, - hl.missing(cdf.values.dtype.element_type), - cdf.values[idx]) + hl.missing(cdf['values'].dtype.element_type), + cdf['values'][idx]) return res return hl.rbind(cdf, compute) diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 852cf064d56..2bf89c8c47f 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -1404,6 +1404,35 @@ def assert_typed(expr, result, dtype): hl.Struct(f1=1, f2=2, f3=3), tstruct(f1=tint32, f2=tint32, f3=tint32)) + def test_shadowed_struct_fields(self): + from typing import Callable + + s = hl.struct(foo=1, values=2, collect=3, _ir=4) + assert 'foo' not in s._warn_on_shadowed_name + assert isinstance(s.foo, hl.Expression) + assert 'values' in s._warn_on_shadowed_name + assert isinstance(s.values, Callable) + assert 'values' not in s._warn_on_shadowed_name + assert 'collect' in s._warn_on_shadowed_name + assert isinstance(s.collect, Callable) + assert 'collect' not in s._warn_on_shadowed_name + assert '_ir' in s._warn_on_shadowed_name + assert isinstance(s._ir, ir.IR) + assert '_ir' not in s._warn_on_shadowed_name + + s = hl.StructExpression._from_fields({'foo': hl.int(1), 'values': hl.int(2), 'collect': hl.int(3), '_ir': hl.int(4)}) + assert 'foo' not in s._warn_on_shadowed_name + assert isinstance(s.foo, hl.Expression) + assert 'values' in s._warn_on_shadowed_name + assert isinstance(s.values, Callable) + assert 'values' not in s._warn_on_shadowed_name + assert 'collect' in s._warn_on_shadowed_name + assert isinstance(s.collect, Callable) + assert 'collect' not in s._warn_on_shadowed_name + assert '_ir' in s._warn_on_shadowed_name + assert isinstance(s._ir, ir.IR) + assert '_ir' not in s._warn_on_shadowed_name + def test_iter(self): a = hl.literal([1, 2, 3]) self.assertRaises(hl.expr.ExpressionException, lambda: hl.eval(list(a))) From be9d88a80695b04a2a9eb5826361e0897d94c042 Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Thu, 7 Sep 2023 14:54:48 -0400 Subject: [PATCH 14/15] [hailtop] Fix defaulting to hail.is when the user has no deploy-config (#13585) or domain specified. The key bug here was a check for `'domain' not in config` instead of `config['domain'] is None` --- hail/Makefile | 2 +- hail/python/hail/docs/change_log.md | 9 +++++++++ hail/python/hailtop/batch/docs/change_log.rst | 5 +++++ hail/python/hailtop/config/deploy_config.py | 4 +--- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/hail/Makefile b/hail/Makefile index 3eeb4f1667c..1a93927f1f6 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.15 SPARK_VERSION ?= 3.3.0 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 121 +HAIL_PATCH_VERSION := 122 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index ad76c6fe136..380e0832441 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -52,6 +52,15 @@ supports. policy. Their functionality or even existence may change without notice. Please contact us if you critically depend on experimental functionality.** +## Version 0.2.122 + +Released 2023-09-07 + +### Bug Fixes +- (hail#13585) Fix bug introduced in 0.2.121 where Query-on-Batch + users could not make requests to `batch.hail.is` without a domain configuration + set. + ## Version 0.2.121 Released 2023-08-31 diff --git a/hail/python/hailtop/batch/docs/change_log.rst b/hail/python/hailtop/batch/docs/change_log.rst index c15e9a8ec4d..37cee3d255a 100644 --- a/hail/python/hailtop/batch/docs/change_log.rst +++ b/hail/python/hailtop/batch/docs/change_log.rst @@ -15,6 +15,11 @@ versions. In particular, Hail officially supports: Change Log ========== +**Version 0.2.122** + +- (`#13565 `__) Users can now use VEP images from the `hailgenetics` DockerHub + in Hail Batch. + **Version 0.2.121** - (`#13396 `__) Non-spot instances can be requested via the :meth:`.Job.spot` method. diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index bd3eb4c139c..5f6dee9de1b 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -16,8 +16,6 @@ def env_var_or_default(name: str, defaults: Dict[str, str]) -> str: class DeployConfig: @staticmethod def from_config(config: Dict[str, str]) -> 'DeployConfig': - if 'domain' not in config: - config['domain'] = 'hail.is' return DeployConfig( env_var_or_default('location', config), env_var_or_default('default_namespace', config), @@ -48,7 +46,7 @@ def from_config_file(config_file=None) -> 'DeployConfig': config = { 'location': 'external', 'default_namespace': 'default', - 'domain': get_user_config().get('global', 'domain', fallback=None), + 'domain': get_user_config().get('global', 'domain', fallback='hail.is'), } return DeployConfig.from_config(config) From a56649fbdd039f3ad92e70b4b14654f332e6a646 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Wed, 13 Sep 2023 08:19:18 +1000 Subject: [PATCH 15/15] TF: Replace google_storage_bucket. with module. --- infra/gcp/main.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 04606ca5f7b..bce6774b1a0 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -477,7 +477,7 @@ module "testns_batch_gsa_secret" { } resource "google_storage_bucket_iam_member" "testns_batch_bucket_admin" { - bucket = google_storage_bucket.hail_test_gcs_bucket.name + bucket = module.hail_test_gcs_bucket.name role = "roles/storage.admin" member = "serviceAccount:${module.testns_batch_gsa_secret.email}" } @@ -495,7 +495,7 @@ module "testns_ci_gsa_secret" { } resource "google_storage_bucket_iam_member" "testns_ci_bucket_admin" { - bucket = google_storage_bucket.hail_test_gcs_bucket.name + bucket = module.hail_test_gcs_bucket.name role = "roles/storage.admin" member = "serviceAccount:${module.testns_ci_gsa_secret.email}" } @@ -594,7 +594,7 @@ module "test_dev_gsa_secret" { } resource "google_storage_bucket_iam_member" "test_dev_bucket_admin" { - bucket = google_storage_bucket.hail_test_gcs_bucket.name + bucket = module.hail_test_gcs_bucket.name role = "roles/storage.admin" member = "serviceAccount:${module.test_dev_gsa_secret.email}" } @@ -615,7 +615,7 @@ module "testns_test_dev_gsa_secret" { } resource "google_storage_bucket_iam_member" "testns_test_dev_bucket_admin" { - bucket = google_storage_bucket.hail_test_gcs_bucket.name + bucket = module.hail_test_gcs_bucket.name role = "roles/storage.admin" member = "serviceAccount:${module.testns_test_dev_gsa_secret.email}" }