- {{ batch['id'] }} |
+
+ {{ batch['id'] }}
+ |
+ {{ batch['user'] }} |
+ {{ batch['billing_project'] }} |
{% if 'attributes' in batch and 'name' in batch['attributes'] and batch['attributes']['name'] is not none %}
{{ batch['attributes']['name'] }}
diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html
index 2d66b461303..f9e8ec79ddb 100644
--- a/batch/batch/front_end/templates/job.html
+++ b/batch/batch/front_end/templates/job.html
@@ -3,6 +3,18 @@
{% block content %}
Batch {{ batch_id }} Job {{ job_id }}
+Properties
+
+ - Batch ID: {{ batch_id }}
+ - Job ID: {{ job_id }}
+ - User: {{ job['user'] }}
+ - Billing Project: {{ job['billing_project'] }}
+ - State: {{ job['state'] }}
+ - Exit Code: {% if 'exit_code' in job and job['exit_code'] is not none %}{{ job['exit_code'] }}{% endif %}
+ - Duration: {% if 'duration' in job and job['duration'] is not none %}{{ job['duration'] }}{% endif %}
+ - Cost: {% if 'cost' in job and job['cost'] is not none %}{{ job['cost'] }}{% endif %}
+
+
Attempts
{% if attempts %}
diff --git a/batch/test/test_dag.py b/batch/test/test_dag.py
index 3adae54d1c6..42c8d286604 100644
--- a/batch/test/test_dag.py
+++ b/batch/test/test_dag.py
@@ -156,6 +156,7 @@ def test():
callback_body.pop('duration')
assert (callback_body == {
'id': b.id,
+ 'user': 'test',
'billing_project': 'test',
'token': token,
'state': 'success',
diff --git a/benchmark-service/Dockerfile.test b/benchmark-service/Dockerfile.test
index 8629c8d2b96..f739af15f07 100644
--- a/benchmark-service/Dockerfile.test
+++ b/benchmark-service/Dockerfile.test
@@ -1,6 +1,3 @@
FROM {{ service_base_image.image }}
COPY benchmark-service/test/ /test/
-RUN python3 -m pip install --no-cache-dir \
- pytest-instafail==0.4.1 \
- pytest-asyncio==0.10.0
diff --git a/benchmark-service/test/test_update_commits.py b/benchmark-service/test/test_update_commits.py
index 5ab7ecb8dfe..39b904d95ae 100644
--- a/benchmark-service/test/test_update_commits.py
+++ b/benchmark-service/test/test_update_commits.py
@@ -9,14 +9,13 @@
from hailtop.httpx import client_session
import hailtop.utils as utils
-pytestmark = pytest.mark.asyncio
-
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
sha = 'd626f793ad700c45a878d192652a0378818bbd8b'
+@pytest.mark.asyncio
async def test_update_commits():
deploy_config = get_deploy_config()
headers = service_auth_headers(deploy_config, 'benchmark')
diff --git a/build.yaml b/build.yaml
index 43ecc016348..c884d9d830f 100644
--- a/build.yaml
+++ b/build.yaml
@@ -684,26 +684,6 @@ steps:
to: /cluster-tests.tar.gz
dependsOn:
- hail_build_image
- - kind: runImage
- name: build_hail_spark3
- image:
- valueFrom: hail_build_image.image
- resources:
- memory: "7.5G"
- cpu: "4"
- script: |
- set -ex
- cd /
- rm -rf repo
- mkdir repo
- cd repo
- {{ code.checkout_script }}
- cd hail
- time retry ./gradlew --version
- export SPARK_VERSION="3.0.1" SCALA_VERSION="2.12.12"
- time retry make jars python-version-info wheel
- dependsOn:
- - hail_build_image
- kind: buildImage
name: batch_worker_image
dockerFile: batch/Dockerfile.worker
@@ -2830,6 +2810,8 @@ steps:
mkdir -p ./ci/test ./hail/python
cp /repo/hail/ci/test/resources/build.yaml ./
cp -R /repo/hail/ci/test/resources ./ci/test/
+ cp /repo/hail/tls/Dockerfile ./ci/test/resources/Dockerfile.certs
+ cp /repo/hail/tls/create_certs.py ./ci/test/resources/
cp /repo/hail/pylintrc ./
cp /repo/hail/setup.cfg ./
cp -R /repo/hail/docker ./
@@ -3289,7 +3271,7 @@ steps:
script: |
set -ex
gcloud auth activate-service-account --key-file=/secrets/ci-deploy-0-1--hail-is-hail.json
- SPARK_VERSION=2.4.5
+ SPARK_VERSION=3.1.1
BRANCH=0.2
SHA="{{ code.sha }}"
GS_JAR=gs://hail-common/builds/${BRANCH}/jars/hail-${BRANCH}-${SHA}-Spark-${SPARK_VERSION}.jar
diff --git a/ci/Dockerfile.test b/ci/Dockerfile.test
index 4f2dfbe8150..15ea0b73172 100644
--- a/ci/Dockerfile.test
+++ b/ci/Dockerfile.test
@@ -4,4 +4,3 @@ COPY hail/python/setup-hailtop.py /hailtop/setup.py
COPY hail/python/hailtop /hailtop/hailtop/
RUN hail-pip-install /hailtop && rm -rf /hailtop
COPY ci/test/ /test/
-RUN hail-pip-install pytest-instafail==0.4.1 pytest-asyncio==0.10.0
diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml
index 7078e450185..93bbee7ec08 100644
--- a/ci/test/resources/build.yaml
+++ b/ci/test/resources/build.yaml
@@ -52,6 +52,36 @@ steps:
publishAs: service-base
dependsOn:
- base_image
+ - kind: buildImage
+ name: create_certs_image
+ dockerFile: ci/test/resources/Dockerfile.certs
+ contextPath: ci/test/resources
+ publishAs: test_hello_create_certs_image
+ dependsOn:
+ - service_base_image
+ - kind: runImage
+ name: create_certs
+ image:
+ valueFrom: create_certs_image.image
+ script: |
+ set -ex
+ python3 create_certs.py \
+ {{ default_ns.name }} \
+ config.yaml \
+ /ssl-config-hail-root/hail-root-key.pem \
+ /ssl-config-hail-root/hail-root-cert.pem
+ serviceAccount:
+ name: admin
+ namespace:
+ valueFrom: default_ns.name
+ secrets:
+ - name: ssl-config-hail-root
+ namespace:
+ valueFrom: default_ns.name
+ mountPath: /ssl-config-hail-root
+ dependsOn:
+ - default_ns
+ - create_certs_image
- kind: buildImage
name: hello_image
dockerFile: ci/test/resources/Dockerfile
diff --git a/ci/test/resources/config.yaml b/ci/test/resources/config.yaml
new file mode 100644
index 00000000000..5a67fd4191a
--- /dev/null
+++ b/ci/test/resources/config.yaml
@@ -0,0 +1,4 @@
+principals:
+- name: hello
+ domain: hello
+ kind: json
diff --git a/ci/test/resources/deployment.yaml b/ci/test/resources/deployment.yaml
index 4ef6e318c76..95c7dedee2d 100644
--- a/ci/test/resources/deployment.yaml
+++ b/ci/test/resources/deployment.yaml
@@ -50,6 +50,9 @@ spec:
- name: session-secret-key
mountPath: /session-secret-key
readOnly: true
+ - name: ssl-config
+ mountPath: /ssl-config
+ readOnly: true
env:
- name: HAIL_IP
valueFrom:
@@ -74,6 +77,10 @@ spec:
secret:
optional: false
secretName: session-secret-key
+ - name: ssl-config
+ secret:
+ optional: false
+ secretName: ssl-config-hello
---
apiVersion: v1
kind: Service
@@ -83,8 +90,7 @@ metadata:
app: hello
spec:
ports:
- - name: http
- port: 80
+ - port: 443
protocol: TCP
targetPort: 5000
selector:
diff --git a/ci/test/resources/hello.py b/ci/test/resources/hello.py
index 0e914e097e0..5bfc3fded0a 100644
--- a/ci/test/resources/hello.py
+++ b/ci/test/resources/hello.py
@@ -2,6 +2,7 @@
from aiohttp import web
from hailtop.config import get_deploy_config
+from hailtop.tls import internal_server_ssl_context
from gear import setup_aiohttp_session
@@ -14,15 +15,17 @@
@routes.get('/healthcheck')
-async def get_healthcheck(request): # pylint: disable=W0613
+async def get_healthcheck(request): # pylint: disable=unused-argument
return web.Response()
@routes.get('/sha')
-async def get_sha(request):
+async def get_sha(request): # pylint: disable=unused-argument
return web.Response(text=SHA)
setup_aiohttp_session(app)
app.add_routes(routes)
-web.run_app(deploy_config.prefix_application(app, 'hello'), host='0.0.0.0', port=5000)
+web.run_app(
+ deploy_config.prefix_application(app, 'hello'), host='0.0.0.0', port=5000, ssl_context=internal_server_ssl_context()
+)
diff --git a/ci/test/resources/statefulset.yaml b/ci/test/resources/statefulset.yaml
index 0bdd9a37796..1b9b9a9f2f9 100644
--- a/ci/test/resources/statefulset.yaml
+++ b/ci/test/resources/statefulset.yaml
@@ -49,6 +49,9 @@ spec:
- name: session-secret-key
mountPath: /session-secret-key
readOnly: true
+ - name: ssl-config
+ mountPath: /ssl-config
+ readOnly: true
env:
- name: HAIL_IP
value: "{{ global.ip }}"
@@ -67,6 +70,10 @@ spec:
secret:
optional: false
secretName: session-secret-key
+ - name: ssl-config
+ secret:
+ optional: false
+ secretName: ssl-config-hello
---
apiVersion: v1
kind: Service
@@ -76,8 +83,7 @@ metadata:
app: hello-stateful-set
spec:
ports:
- - name: http
- port: 80
+ - port: 443
protocol: TCP
targetPort: 5000
selector:
diff --git a/ci/test/test_ci.py b/ci/test/test_ci.py
index bd4be73cd7a..b712ffd1a87 100644
--- a/ci/test/test_ci.py
+++ b/ci/test/test_ci.py
@@ -9,12 +9,11 @@
from hailtop.httpx import client_session
import hailtop.utils as utils
-pytestmark = pytest.mark.asyncio
-
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
+@pytest.mark.asyncio
async def test_deploy():
deploy_config = get_deploy_config()
ci_deploy_status_url = deploy_config.url('ci', '/api/v1alpha/deploy_status')
diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base
index 1e899fab56c..11d470f54ee 100644
--- a/docker/Dockerfile.base
+++ b/docker/Dockerfile.base
@@ -26,7 +26,7 @@ RUN /bin/sh -c 'curl https://sdk.cloud.google.com | bash' && \
ENV PATH $PATH:/google-cloud-sdk/bin
COPY docker/requirements.txt .
-RUN hail-pip-install -r requirements.txt pyspark==2.4.0
+RUN hail-pip-install -r requirements.txt pyspark==3.1.1
ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark
ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin"
diff --git a/docker/Dockerfile.service-java-run-base b/docker/Dockerfile.service-java-run-base
index 4ca7f6dd789..c05ae40e118 100644
--- a/docker/Dockerfile.service-java-run-base
+++ b/docker/Dockerfile.service-java-run-base
@@ -8,7 +8,7 @@ RUN hail-apt-get-install \
liblapack3
COPY docker/requirements.txt .
-RUN hail-pip-install -r requirements.txt pyspark==2.4.0
+RUN hail-pip-install -r requirements.txt pyspark==3.1.1
ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark
ENV PYSPARK_PYTHON python3
diff --git a/docker/requirements.txt b/docker/requirements.txt
index 5b3c1ca3394..13d0ec46796 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -24,7 +24,7 @@ google-cloud-logging==1.12.1
google-cloud-storage==1.25.0
humanize==1.0.0
hurry.filesize==0.9
-Jinja2==2.10.1
+Jinja2==2.11.3
# keyrings.alt>3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6
keyrings.alt>=3.1
kubernetes-asyncio==9.1.0
@@ -38,11 +38,11 @@ pyjwt==1.7.1
pylint==2.6.0
astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131
PyMySQL==0.9.2
-pytest==4.6.3
-pytest-asyncio==0.10.0
+pytest==6.2.2
+pytest-asyncio==0.14.0
pytest-html==1.20.0
-pytest-instafail==0.4.1
-pytest-xdist==1.28
+pytest-instafail==0.4.2
+pytest-xdist==2.2.1
python-dateutil==2.8.1
python-json-logger==0.1.11
requests==2.22.0
diff --git a/gateway/gateway.nginx.conf b/gateway/gateway.nginx.conf
index b21f4d0cd13..3408533c769 100644
--- a/gateway/gateway.nginx.conf
+++ b/gateway/gateway.nginx.conf
@@ -17,11 +17,6 @@ server {
}
}
-map $maybe_router_scheme $router_scheme {
- default $maybe_router_scheme;
- '' http;
-}
-
server {
server_name internal.hail.populationgenomics.org.au;
client_max_body_size 8m;
@@ -29,7 +24,7 @@ server {
location = /auth {
internal;
resolver kube-dns.kube-system.svc.cluster.local;
- proxy_pass https://router-resolver.default.svc.cluster.local/auth/$namespace;
+ proxy_pass https://auth/api/v1alpha/verify_dev_credentials;
include /ssl-config/ssl-config-proxy.conf;
}
@@ -38,10 +33,9 @@ server {
set $service $2;
auth_request /auth;
- auth_request_set $router_ip $upstream_http_x_router_ip;
- auth_request_set $maybe_router_scheme $upstream_http_x_router_scheme;
- proxy_pass $router_scheme://$router_ip$request_uri;
+ resolver kube-dns.kube-system.svc.cluster.local;
+ proxy_pass https://$service.$namespace.svc.cluster.local;
proxy_set_header Host $service.internal;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
diff --git a/hail/Makefile b/hail/Makefile
index 36c57a99717..9596c2997fa 100644
--- a/hail/Makefile
+++ b/hail/Makefile
@@ -10,8 +10,8 @@ MAKEFLAGS += --no-builtin-rules
REVISION := $(shell git rev-parse HEAD)
SHORT_REVISION := $(shell git rev-parse --short=12 HEAD)
BRANCH := $(shell git rev-parse --abbrev-ref HEAD)
-SCALA_VERSION ?= 2.11.12
-SPARK_VERSION ?= 2.4.5
+SCALA_VERSION ?= 2.12.13
+SPARK_VERSION ?= 3.1.1
HAIL_MAJOR_MINOR_VERSION := 0.2
HAIL_PATCH_VERSION := 64
HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION)
@@ -59,12 +59,23 @@ SHADOW_TEST_JAR := build/libs/hail-all-spark-test.jar
PYTHON_JAR := python/hail/backend/hail-all-spark.jar
WHEEL := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl
EGG := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3.6.egg
+ELASTICSEARCH_JAR := libs/elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311.jar
GRADLE_ARGS += -Dscala.version=$(SCALA_VERSION) -Dspark.version=$(SPARK_VERSION) -Delasticsearch.major-version=$(ELASTIC_MAJOR_VERSION)
+.PHONY: elasticsearchJar
+elasticsearchJar: $(ELASTICSEARCH_JAR)
+
+$(ELASTICSEARCH_JAR):
+ @mkdir -p libs
+ gsutil cp gs://hail-common/elasticsearch-libs/elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311.jar libs/
+
.PHONY: shadowJar
shadowJar: $(SHADOW_JAR)
+$(JAR_SOURCES): $(ELASTICSEARCH_JAR)
+$(JAR_TEST_SOURCES): $(ELASTICSEARCH_JAR)
+
ifdef HAIL_COMPILE_NATIVES
$(SHADOW_JAR): native-lib-prebuilt
endif
@@ -372,7 +383,10 @@ native-lib-prebuilt:
native-lib-reset-prebuilt:
$(MAKE) -C src/main/c reset-prebuilt
-clean: clean-env native-lib-clean
+clean-libs:
+ rm -rf libs
+
+clean: clean-env clean-libs native-lib-clean
$(MAKE) -C python/hail/docs clean
$(MAKE) -C python/hailtop/batch/docs clean
./gradlew clean $(GRADLE_ARGS)
diff --git a/hail/build.gradle b/hail/build.gradle
index 91202a23138..9a3593eb394 100644
--- a/hail/build.gradle
+++ b/hail/build.gradle
@@ -18,6 +18,9 @@ plugins {
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
repositories {
+ flatDir {
+ dirs 'libs'
+ }
mavenCentral()
jcenter()
maven { url "https://repository.cloudera.com/artifactory/cloudera-repos/" }
@@ -46,8 +49,17 @@ tasks.withType(JavaCompile) {
project.ext {
cachedBreezeVersion = null
- sparkVersion = System.getProperty("spark.version", "2.4.5")
- scalaVersion = System.getProperty("scala.version", "2.11.12")
+ sparkVersion = System.getProperty("spark.version", "3.1.1")
+ if (sparkVersion.startsWith("2.")) {
+ throw new UnsupportedOperationException("Hail no longer supports Spark 2.")
+ }
+ else if (sparkVersion != "3.1.1") {
+ project.logger.lifecycle("WARNING: Hail only tested with Spark 3.1.1, use other versions at your own risk.")
+ }
+ scalaVersion = System.getProperty("scala.version", "2.12.13")
+ if (!scalaVersion.startsWith("2.12.")) {
+ throw new UnsupportedOperationException("Hail currently only supports Scala 2.12")
+ }
scalaMajorVersion = (scalaVersion =~ /^\d+.\d+/)[0]
}
@@ -100,19 +112,6 @@ String breezeVersion() {
return cachedBreezeVersion
}
-String elasticHadoopVersion() {
- def elasticMajorVersion = System.getProperty("elasticsearch.major-version", "7")
- if (elasticMajorVersion == "6") {
- return "6.8.13"
- }
- else if (elasticMajorVersion == "7") {
- return "7.8.1"
- }
- else {
- throw new UnsupportedOperationException("elasticsearch.major-version must be 6 or 7")
- }
-}
-
configurations {
justSpark
@@ -126,9 +125,8 @@ configurations {
}
}
eachDependency { DependencyResolveDetails details ->
- if (details.requested.group == 'org.json4s') {
- // JSON4S 3.6.0+ contain a known bug (https://github.com/json4s/json4s/issues/507)
- details.useVersion('3.5.3')
+ if (details.requested.group == 'org.apache.spark') {
+ details.useVersion(sparkVersion)
} else if (details.requested.group == 'org.scalanlp' && details.requested.version == '1.0') {
// Breeze 1.0 contains a known bug (https://github.com/scalanlp/breeze/issues/772)
details.useVersion('1.1')
@@ -181,7 +179,12 @@ dependencies {
bundled group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25'
- bundled 'org.elasticsearch:elasticsearch-spark-20_2.11:' + elasticHadoopVersion()
+ def elasticMajorVersion = System.getProperty("elasticsearch.major-version", "7")
+ if (elasticMajorVersion != "7") {
+ throw new UnsupportedOperationException("elasticsearch.major-version must be 7")
+ }
+ // This comes from a local libs directory, see Makefile
+ bundled 'org.elasticsearch:elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311'
bundled 'com.google.cloud:google-cloud-storage:1.106.0'
@@ -310,7 +313,6 @@ tasks.withType(ShadowJar) {
// we should really shade indeed, but it has native libraries
// relocate 'com.indeed', 'is.hail.relocated.com.indeed'
relocate 'com.google.cloud', 'is.hail.relocated.com.google.cloud'
- relocate 'org.elasticsearch', 'is.hail.relocated.org.elasticsearch'
relocate 'com.github.samtools', 'is.hail.relocated.com.github.samtools'
relocate 'org.lz4', 'is.hail.relocated.org.lz4'
relocate 'org.freemarker', 'is.hail.relocated.org.freemarker'
diff --git a/hail/python/dev-requirements.txt b/hail/python/dev-requirements.txt
index 039b6b4412c..a2f706f9483 100644
--- a/hail/python/dev-requirements.txt
+++ b/hail/python/dev-requirements.txt
@@ -5,10 +5,10 @@ astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131
pre-commit==2.9.2
black==20.8b1
curlylint==0.12.0
-pytest==4.6.3
+pytest==6.2.2
pytest-html==1.20.0
-pytest-xdist==1.28
-pytest-instafail==0.4.1
+pytest-xdist==2.2.1
+pytest-instafail==0.4.2
sphinx==3.2.1
sphinx-autodoc-typehints==1.11.0
nbsphinx==0.7.1
diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py
index c246b401498..6ab7220e6ed 100644
--- a/hail/python/hail/backend/local_backend.py
+++ b/hail/python/hail/backend/local_backend.py
@@ -126,7 +126,7 @@ def __init__(self, tmpdir, log, quiet, append, branching_factor,
port = launch_gateway(
redirect_stdout=sys.stdout,
redirect_stderr=sys.stderr,
- jarpath=f'{spark_home}/jars/py4j-0.10.7.jar',
+ jarpath=f'{spark_home}/jars/py4j-0.10.9.jar',
classpath=f'{spark_home}/jars/*:{hail_jar_path}',
die_on_exit=True)
self._gateway = JavaGateway(
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst
new file mode 100644
index 00000000000..a308200981c
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_AFR:
+
+panukb_ld_scores_AFR
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst
new file mode 100644
index 00000000000..759c06d15ff
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_AMR:
+
+panukb_ld_scores_AMR
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst
new file mode 100644
index 00000000000..c0d11c300bf
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_CSA:
+
+panukb_ld_scores_CSA
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst
new file mode 100644
index 00000000000..c036bb98686
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_EAS:
+
+panukb_ld_scores_EAS
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst
new file mode 100644
index 00000000000..3697ceb561e
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_EUR:
+
+panukb_ld_scores_EUR
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst
new file mode 100644
index 00000000000..de24078b6cd
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst
@@ -0,0 +1,28 @@
+.. _panukb_ld_scores_MID:
+
+panukb_ld_scores_MID
+====================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ None
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'rsid': str
+ 'varid': str
+ 'AF': float64
+ 'ld_score': float64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst
new file mode 100644
index 00000000000..b82c3826cf1
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_AFR:
+
+panukb_ld_variant_indices_AFR
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst
new file mode 100644
index 00000000000..09445a9e971
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_AMR:
+
+panukb_ld_variant_indices_AMR
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst
new file mode 100644
index 00000000000..7c0d9e8c8ca
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_CSA:
+
+panukb_ld_variant_indices_CSA
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst
new file mode 100644
index 00000000000..583575bf111
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_EAS:
+
+panukb_ld_variant_indices_EAS
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst
new file mode 100644
index 00000000000..97c87a3cb93
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_EUR:
+
+panukb_ld_variant_indices_EUR
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst
new file mode 100644
index 00000000000..b6a7969b6f1
--- /dev/null
+++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst
@@ -0,0 +1,26 @@
+.. _panukb_ld_variant_indices_MID:
+
+panukb_ld_variant_indices_MID
+=============================
+
+* **Versions:** 0.2
+* **Reference genome builds:** GRCh37
+* **Type:** :class:`hail.Table`
+
+Schema (0.2, GRCh37)
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: text
+
+ ----------------------------------------
+ Global fields:
+ 'n_samples': int32
+ 'pop': str
+ ----------------------------------------
+ Row fields:
+ 'locus': locus
+ 'alleles': array
+ 'idx': int64
+ ----------------------------------------
+ Key: ['locus', 'alleles']
+ ----------------------------------------
diff --git a/hail/python/hail/docs/install/linux.rst b/hail/python/hail/docs/install/linux.rst
index 66eb949ccc4..8af0649b4ae 100644
--- a/hail/python/hail/docs/install/linux.rst
+++ b/hail/python/hail/docs/install/linux.rst
@@ -3,7 +3,7 @@ Install Hail on GNU/Linux
=========================
- Install Java 8.
-- Install Python 3.6 or 3.7.
+- Install Python 3.6+.
- Install a recent version of the C and C++ standard libraries. GCC 5.0, LLVM
version 3.4, or any later versions suffice.
- Install BLAS and LAPACK.
diff --git a/hail/python/hail/docs/install/macosx.rst b/hail/python/hail/docs/install/macosx.rst
index 0a8cf198408..a826a4520d4 100644
--- a/hail/python/hail/docs/install/macosx.rst
+++ b/hail/python/hail/docs/install/macosx.rst
@@ -3,6 +3,6 @@ Install Hail on Mac OS X
========================
- Install `Java 8 `__.
-- Install Python 3.6 or 3.7. We recommend `Miniconda `__; however, the latest version of Miniconda installs Python 3.8 by default. Please follow `these instructions `__ to create a Python 3.6 or 3.7 environment.
+- Install Python 3.6+. We recommend `Miniconda `__.
- Open Terminal.app and execute ``pip install hail``.
- `Run your first Hail query! `__
diff --git a/hail/python/hail/docs/install/other-cluster.rst b/hail/python/hail/docs/install/other-cluster.rst
index b294e46bb1e..af7514b9114 100644
--- a/hail/python/hail/docs/install/other-cluster.rst
+++ b/hail/python/hail/docs/install/other-cluster.rst
@@ -5,13 +5,13 @@ Install Hail on a Spark Cluster
If you are using Google Dataproc, please see `these simpler instructions
`__.
-Hail should work with any Spark 2.4.x cluster built with Scala 2.11.
+Hail should work with any Spark 3.1.1 cluster built with Scala 2.12.
Hail needs to be built from source on the leader node. Building Hail from source
requires:
- Java 8 JDK.
-- Python 3.6 or 3.7.
+- Python 3.6+.
- A recent C and a C++ compiler, GCC 5.0, LLVM 3.4, or later versions of either
suffice.
- BLAS and LAPACK.
diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json
index 59507dd4fdc..f6cac416040 100644
--- a/hail/python/hail/experimental/datasets.json
+++ b/hail/python/hail/experimental/datasets.json
@@ -1581,5 +1581,311 @@
"version": "1.1"
}
]
+ },
+ "panukb_ld_block_matrix_AFR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for African ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_AFR.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_block_matrix_AMR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Admixed American ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_AMR.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_block_matrix_CSA": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Central/South Asian ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_CSA.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_block_matrix_EAS": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for East Asian ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_EAS.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_block_matrix_EUR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for European ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_EUR.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_block_matrix_MID": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Middle Eastern ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_MID.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldadj.bm"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_AFR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for African ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_AMR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Admixed American ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_CSA": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Central/South Asian ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_EAS": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for East Asian ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_EUR": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for European ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_scores_MID": {
+ "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Middle Eastern ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldscore.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_AFR": {
+ "description": "Pan-UKB: variant indices Hail Table for African ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_AMR": {
+ "description": "Pan-UKB: variant indices Hail Table for Admixed American ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_CSA": {
+ "description": "Pan-UKB: variant indices Hail Table for Central/South Asian ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_EAS": {
+ "description": "Pan-UKB: variant indices Hail Table for East Asian ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_EUR": {
+ "description": "Pan-UKB: variant indices Hail Table for European ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_ld_variant_indices_MID": {
+ "description": "Pan-UKB: variant indices Hail Table for Middle Eastern ancestry population.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldadj.variant.ht"
+ }
+ },
+ "version": "0.2"
+ }
+ ]
+ },
+ "panukb_meta_analysis": {
+ "description": "Pan-UKB: pan-ancestry GWAS of UK Biobank, full meta-analysis Hail MatrixTable.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview/index.html",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/sumstats_release/meta_analysis.mt"
+ },
+ "gcp": {
+ "us": "gs://ukb-diverse-pops-public/sumstats_release/meta_analysis.mt"
+ }
+ },
+ "version": "0.1"
+ }
+ ]
+ },
+ "panukb_summary_stats": {
+ "description": "Pan-UKB: pan-ancestry GWAS of UK Biobank, full summary statistics Hail MatrixTable.",
+ "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview/index.html",
+ "versions": [
+ {
+ "reference_genome": "GRCh37",
+ "url": {
+ "aws": {
+ "us": "s3://pan-ukb-us-east-1/sumstats_release/results_full.mt"
+ },
+ "gcp": {
+ "us": "gs://ukb-diverse-pops-public/sumstats_release/results_full.mt"
+ }
+ },
+ "version": "0.1"
+ }
+ ]
}
}
diff --git a/hail/python/hail/experimental/table_ndarray_utils.py b/hail/python/hail/experimental/table_ndarray_utils.py
index e107db97520..7d9fcef5cef 100644
--- a/hail/python/hail/experimental/table_ndarray_utils.py
+++ b/hail/python/hail/experimental/table_ndarray_utils.py
@@ -3,7 +3,7 @@
from hail.utils.java import Env
-def mt_to_table_of_ndarray(entry_expr, block_size=16):
+def mt_to_table_of_ndarray(entry_expr, block_size=16, return_checkpointed_table_also=False):
check_entry_indexed('mt_to_table_of_ndarray/entry_expr', entry_expr)
mt = matrix_table_source('mt_to_table_of_ndarray/entry_expr', entry_expr)
@@ -35,8 +35,11 @@ def get_even_partitioning(ht, partition_size, total_num_rows):
ht = ht.checkpoint(temp_file_name)
num_rows = ht.count()
new_partitioning = get_even_partitioning(ht, block_size, num_rows)
- ht = hl.read_table(temp_file_name, _intervals=new_partitioning)
+ new_part_ht = hl.read_table(temp_file_name, _intervals=new_partitioning)
- grouped = ht._group_within_partitions("groups", block_size)
+ grouped = new_part_ht._group_within_partitions("groups", block_size)
A = grouped.select(ndarray=hl.nd.array(grouped.groups.map(lambda group: group.xs)))
+
+ if return_checkpointed_table_also:
+ return A, ht
return A
diff --git a/hail/python/hail/linalg/blockmatrix.py b/hail/python/hail/linalg/blockmatrix.py
index 3190aa6ac8f..b7231575bfc 100644
--- a/hail/python/hail/linalg/blockmatrix.py
+++ b/hail/python/hail/linalg/blockmatrix.py
@@ -1375,6 +1375,8 @@ def __add__(self, b):
-------
:class:`.BlockMatrix`
"""
+ if isinstance(b, (int, float)):
+ return self._map_dense(lambda entry: entry + b)
return self._apply_map2(BlockMatrix._binary_op('+'), b, sparsity_strategy="Union")
@typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type))
@@ -1389,6 +1391,8 @@ def __sub__(self, b):
-------
:class:`.BlockMatrix`
"""
+ if isinstance(b, (int, float)):
+ return self._map_dense(lambda entry: entry - b)
return self._apply_map2(BlockMatrix._binary_op('-'), b, sparsity_strategy="Union")
@typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type))
@@ -1403,6 +1407,9 @@ def __mul__(self, b):
-------
:class:`.BlockMatrix`
"""
+ if isinstance(b, (int, float)):
+ # sparse since multiplying by zero is zero
+ return self._map_sparse(lambda entry: entry * b)
return self._apply_map2(BlockMatrix._binary_op('*'), b, sparsity_strategy="Intersection")
@typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type))
@@ -1417,6 +1424,9 @@ def __truediv__(self, b):
-------
:class:`.BlockMatrix`
"""
+ if isinstance(b, (int, float)):
+ # sparse since dividing by zero is zero
+ return self._map_sparse(lambda entry: entry / b)
return self._apply_map2(BlockMatrix._binary_op('/'), b, sparsity_strategy="NeedsDense")
@typecheck_method(b=numeric)
@@ -1528,6 +1538,12 @@ def __pow__(self, x):
"""
return self._apply_map(lambda i: i ** x, needs_dense=False)
+ def _map_dense(self, func):
+ return self._apply_map(func, True)
+
+ def _map_sparse(self, func):
+ return self._apply_map(func, False)
+
def sqrt(self):
"""Element-wise square root.
diff --git a/hail/python/hail/methods/pca.py b/hail/python/hail/methods/pca.py
index 53583baf123..db8f42769f4 100644
--- a/hail/python/hail/methods/pca.py
+++ b/hail/python/hail/methods/pca.py
@@ -300,10 +300,10 @@ def _blanczos_pca(entry_expr, k=10, compute_loadings=False, q_iterations=2, over
(:obj:`list` of :obj:`float`, :class:`.Table`, :class:`.Table`)
List of eigenvalues, table with column scores, table with row loadings.
"""
-
+ check_entry_indexed('mt_to_table_of_ndarray/entry_expr', entry_expr)
mt = matrix_table_source('pca/entry_expr', entry_expr)
- A = mt_to_table_of_ndarray(entry_expr, block_size)
+ A, ht = mt_to_table_of_ndarray(entry_expr, block_size, return_checkpointed_table_also=True)
A = A.persist()
# Set Parameters
@@ -365,10 +365,12 @@ def hailBlanczos(A, G, k, q):
cols_and_scores = hl.zip(A.index_globals().cols, hail_array_scores).map(lambda tup: tup[0].annotate(scores=tup[1]))
st = hl.Table.parallelize(cols_and_scores, key=list(mt.col_key))
- lt = mt.rows().select()
+ lt = ht.select()
lt = lt.annotate_globals(U=U)
- lt = lt.add_index()
- lt = lt.annotate(loadings=lt.U[lt.idx, :]._data_array()).select_globals()
+ idx_name = '_tmp_pca_loading_index'
+ lt = lt.add_index(idx_name)
+ lt = lt.annotate(loadings=lt.U[lt[idx_name], :]._data_array()).select_globals()
+ lt = lt.drop(lt[idx_name])
if compute_loadings:
return eigens, st, lt
diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py
index 976cebd9ebf..0eb986ecbdf 100644
--- a/hail/python/hailtop/batch_client/aioclient.py
+++ b/hail/python/hailtop/batch_client/aioclient.py
@@ -203,6 +203,8 @@ async def is_complete(self):
# {
# batch_id: int
# job_id: int
+ # user: str
+ # billing_project: str
# name: optional(str)
# state: str (Ready, Running, Success, Error, Failure, Cancelled)
# exit_code: optional(int)
@@ -349,7 +351,8 @@ async def get_job_log(self, job_id: int) -> Optional[Dict[str, Any]]:
return await self._client.get_job_log(self.id, job_id)
# {
- # id: int,
+ # id: int
+ # user: str
# billing_project: str
# token: str
# state: str, (open, failure, cancelled, success, running)
diff --git a/hail/python/hailtop/batch_client/client.py b/hail/python/hailtop/batch_client/client.py
index 530fe1592d2..d4a67d51839 100644
--- a/hail/python/hailtop/batch_client/client.py
+++ b/hail/python/hailtop/batch_client/client.py
@@ -82,6 +82,8 @@ def is_complete(self):
# {
# batch_id: int
# job_id: int
+ # user: str
+ # billing_project: str
# name: optional(str)
# state: str (Ready, Running, Success, Error, Failure, Cancelled)
# exit_code: optional(int)
@@ -128,7 +130,8 @@ def cancel(self):
async_to_blocking(self._async_batch.cancel())
# {
- # id: int,
+ # id: int
+ # user: str
# billing_project: str
# token: str
# state: str, (open, failure, cancelled, success, running)
diff --git a/hail/python/hailtop/hailctl/dataproc/start.py b/hail/python/hailtop/hailctl/dataproc/start.py
index b8aff6137fb..81e341f1e2f 100755
--- a/hail/python/hailtop/hailctl/dataproc/start.py
+++ b/hail/python/hailtop/hailctl/dataproc/start.py
@@ -136,7 +136,7 @@
ANNOTATION_DB_BUCKETS = ["hail-datasets-us", "hail-datasets-eu", "gnomad-public-requester-pays"]
-IMAGE_VERSION = '1.4-debian9'
+IMAGE_VERSION = '2.0.6-debian10'
def init_parser(parser):
diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt
index c3909bbd60f..86353224136 100644
--- a/hail/python/requirements.txt
+++ b/hail/python/requirements.txt
@@ -13,7 +13,7 @@ numpy<2
pandas>=1.1.0,<1.1.5
parsimonious<0.9
PyJWT
-pyspark>=2.4,<2.4.2
+pyspark>=3.1.1,<3.2.0
python-json-logger==0.1.11
requests==2.22.0
scipy>1.2,<1.7
diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py
index f4f7743bdc4..715d3e6b4cb 100644
--- a/hail/python/test/hail/expr/test_ndarrays.py
+++ b/hail/python/test/hail/expr/test_ndarrays.py
@@ -59,7 +59,7 @@ def test_ndarray_ref():
with pytest.raises(HailUserError) as exc:
hl.eval(hl.nd.array([1, 2, 3])[4])
- assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc)
+ assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc.value)
@fails_service_backend()
@@ -295,31 +295,31 @@ def test_ndarray_reshape():
with pytest.raises(FatalError) as exc:
hl.eval(hl.literal(np_cube).reshape((-1, -1)))
- assert "more than one -1" in str(exc)
+ assert "more than one -1" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(hl.literal(np_cube).reshape((20,)))
- assert "requested shape is incompatible with number of elements" in str(exc)
+ assert "requested shape is incompatible with number of elements" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(a.reshape((3,)))
- assert "requested shape is incompatible with number of elements" in str(exc)
+ assert "requested shape is incompatible with number of elements" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(a.reshape(()))
- assert "requested shape is incompatible with number of elements" in str(exc)
+ assert "requested shape is incompatible with number of elements" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(hl.literal(np_cube).reshape((0, 2, 2)))
- assert "requested shape is incompatible with number of elements" in str(exc)
+ assert "requested shape is incompatible with number of elements" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(hl.literal(np_cube).reshape((2, 2, -2)))
- assert "must contain only nonnegative numbers or -1" in str(exc)
+ assert "must contain only nonnegative numbers or -1" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(shape_zero.reshape((0, -1)))
- assert "Can't reshape" in str(exc)
+ assert "Can't reshape" in str(exc.value)
with pytest.raises(TypeError):
a.reshape(hl.tuple(['4', '5']))
@@ -616,11 +616,11 @@ def test_ndarray_matmul():
with pytest.raises(FatalError) as exc:
hl.eval(r @ r)
- assert "Matrix dimensions incompatible: 3 2" in str(exc)
+ assert "Matrix dimensions incompatible: 3 2" in str(exc.value)
with pytest.raises(FatalError) as exc:
hl.eval(hl.nd.array([1, 2]) @ hl.nd.array([1, 2, 3]))
- assert "Matrix dimensions incompatible" in str(exc)
+ assert "Matrix dimensions incompatible" in str(exc.value)
def test_ndarray_big():
@@ -651,7 +651,7 @@ def test_ndarray_arange():
with pytest.raises(FatalError) as exc:
hl.eval(hl.nd.arange(5, 20, 0))
- assert "Array range cannot have step size 0" in str(exc)
+ assert "Array range cannot have step size 0" in str(exc.value)
def test_ndarray_mixed():
@@ -680,7 +680,7 @@ def test_ndarray_diagonal():
with pytest.raises(AssertionError) as exc:
hl.nd.diagonal(hl.nd.array([1, 2]))
- assert "2 dimensional" in str(exc)
+ assert "2 dimensional" in str(exc.value)
def test_ndarray_qr():
@@ -782,11 +782,11 @@ def assert_same_qr(hl_ndarray, np_ndarray):
with pytest.raises(ValueError) as exc:
hl.nd.qr(wiki_example, mode="invalid")
- assert "Unrecognized mode" in str(exc)
+ assert "Unrecognized mode" in str(exc.value)
with pytest.raises(AssertionError) as exc:
hl.nd.qr(hl.nd.arange(6))
- assert "requires 2 dimensional" in str(exc)
+ assert "requires 2 dimensional" in str(exc.value)
def test_svd():
@@ -1053,4 +1053,4 @@ def test_agg_ndarray_sum():
mismatched = hl.utils.range_table(5)
mismatched = mismatched.annotate(x=hl.nd.ones((mismatched.idx,)))
mismatched.aggregate(hl.agg.ndarray_sum(mismatched.x))
- assert "Can't sum" in str(exc)
+ assert "Can't sum" in str(exc.value)
diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py
index 02d56855b30..289b284e44c 100644
--- a/hail/python/test/hail/linalg/test_linalg.py
+++ b/hail/python/test/hail/linalg/test_linalg.py
@@ -1097,11 +1097,11 @@ def test_filtering(self):
with pytest.raises(ValueError) as exc:
bm.filter_cols([0]).filter_cols([3]).to_numpy()
- assert "index" in str(exc)
+ assert "index" in str(exc.value)
with pytest.raises(ValueError) as exc:
bm.filter_rows([0]).filter_rows([3]).to_numpy()
- assert "index" in str(exc)
+ assert "index" in str(exc.value)
@skip_unless_spark_backend()
def test_sparsify_blocks(self):
diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
index 15fffb0d966..c94265dc509 100644
--- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
+++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
@@ -4,7 +4,6 @@ import java.io._
import java.net._
import java.nio.charset.StandardCharsets
import java.util.concurrent._
-
import is.hail.HailContext
import is.hail.annotations._
import is.hail.asm4s._
@@ -33,6 +32,7 @@ import org.json4s.{DefaultFormats, Formats}
import org.newsclub.net.unix.{AFUNIXSocket, AFUNIXSocketAddress, AFUNIXServerSocket}
+import java.nio.charset.Charset
import scala.collection.mutable
import scala.reflect.ClassTag
import scala.annotation.switch
@@ -89,7 +89,7 @@ object Worker {
val fs = retryTransientErrors {
using(new FileInputStream(s"$scratchDir/gsa-key/key.json")) { is =>
- new GoogleStorageFS(IOUtils.toString(is))
+ new GoogleStorageFS(IOUtils.toString(is, Charset.defaultCharset()))
}
}
diff --git a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
index 78da7ed33ab..c04d672e55d 100644
--- a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
+++ b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala
@@ -140,10 +140,10 @@ object JSONAnnotationImpex {
}
}
- def irImportAnnotation(s: String, t: Type): Row = {
+ def irImportAnnotation(s: String, t: Type, warnContext: mutable.HashSet[String]): Row = {
try {
// wraps in a Row to handle returned missingness
- Row(importAnnotation(JsonMethods.parse(s), t, true, null))
+ Row(importAnnotation(JsonMethods.parse(s), t, true, warnContext))
} catch {
case e: Throwable =>
fatal(s"Error parsing JSON:\n type: $t\n value: $s", e)
diff --git a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
index cc2c4e57dd1..33338e79c4e 100644
--- a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala
@@ -21,8 +21,7 @@ object RelationalSpec {
implicit val formats: Formats = new DefaultFormats() {
override val typeHints = ShortTypeHints(List(
classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec],
- classOf[RelationalSpec], classOf[MatrixTableSpec], classOf[TableSpec]))
- override val typeHintFieldName = "name"
+ classOf[RelationalSpec], classOf[MatrixTableSpec], classOf[TableSpec]), typeHintFieldName="name")
} +
new TableTypeSerializer +
new MatrixTypeSerializer
@@ -150,8 +149,7 @@ object MatrixTableSpec {
def fromJValue(fs: FS, path: String, jv: JValue): MatrixTableSpec = {
implicit val formats: Formats = new DefaultFormats() {
override val typeHints = ShortTypeHints(List(
- classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec]))
- override val typeHintFieldName = "name"
+ classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec]), typeHintFieldName = "name")
} +
new MatrixTypeSerializer
val params = jv.extract[MatrixTableSpecParameters]
diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala
index 4c4e866fe84..bbe0e9289d4 100644
--- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala
@@ -92,8 +92,8 @@ case class BlockMatrixRead(reader: BlockMatrixReader) extends BlockMatrixIR {
object BlockMatrixReader {
implicit val formats: Formats = new DefaultFormats() {
override val typeHints = ShortTypeHints(
- List(classOf[BlockMatrixNativeReader], classOf[BlockMatrixBinaryReader], classOf[BlockMatrixPersistReader]))
- override val typeHintFieldName: String = "name"
+ List(classOf[BlockMatrixNativeReader], classOf[BlockMatrixBinaryReader], classOf[BlockMatrixPersistReader]),
+ typeHintFieldName = "name")
}
def fromJValue(ctx: ExecuteContext, jv: JValue): BlockMatrixReader = {
@@ -270,6 +270,17 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen
override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = {
val prev = child.execute(ctx)
+ val functionArgs = f match {
+ case ApplyUnaryPrimOp(_, arg1) => IndexedSeq(arg1)
+ case Apply(_, _, args, _) => args
+ case ApplyBinaryPrimOp(_, l, r) => IndexedSeq(l, r)
+ }
+
+ assert(functionArgs.forall(ir => IsConstant(ir) || ir.isInstanceOf[Ref]),
+ "Spark backend without lowering does not support general mapping over " +
+ "BlockMatrix entries. Use predefined functions like `BlockMatrix.abs`.")
+
+
val (name, breezeF): (String, DenseMatrix[Double] => DenseMatrix[Double]) = f match {
case ApplyUnaryPrimOp(Negate(), _) => ("negate", BlockMatrix.negationOp)
case Apply("abs", _, _, _) => ("abs", numerics.abs(_))
@@ -457,7 +468,7 @@ case class BlockMatrixDot(left: BlockMatrixIR, right: BlockMatrixIR) extends Blo
val (tensorShape, isRowVector) = BlockMatrixIR.matrixShapeToTensorShape(lRows, rCols)
val sparsity = if (left.typ.isSparse || right.typ.isSparse)
- BlockMatrixSparsity(
+ BlockMatrixSparsity.constructFromShapeAndFunction(
BlockMatrixType.numBlocks(lRows, blockSize),
BlockMatrixType.numBlocks(rCols, blockSize)) { (i: Int, j: Int) =>
Array.tabulate(BlockMatrixType.numBlocks(rCols, blockSize)) { k =>
@@ -527,14 +538,14 @@ case class BlockMatrixBroadcast(
BlockMatrixSparsity.dense
case IndexedSeq(0) => // broadcast col vector
assert(Set(1, shape(0)) == Set(child.typ.nRows, child.typ.nCols))
- BlockMatrixSparsity(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(0 -> j))
+ BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(0 -> j))
case IndexedSeq(1) => // broadcast row vector
assert(Set(1, shape(1)) == Set(child.typ.nRows, child.typ.nCols))
- BlockMatrixSparsity(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(i -> 0))
+ BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(i -> 0))
case IndexedSeq(0, 0) => // diagonal as col vector
assert(shape(0) == 1L)
assert(shape(1) == java.lang.Math.min(child.typ.nRows, child.typ.nCols))
- BlockMatrixSparsity(nRowBlocks, nColBlocks)((_, j: Int) => child.typ.hasBlock(j -> j))
+ BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((_, j: Int) => child.typ.hasBlock(j -> j))
case IndexedSeq(1, 0) => // transpose
assert(child.typ.blockSize == blockSize)
assert(shape(0) == child.typ.nCols && shape(1) == child.typ.nRows)
@@ -613,11 +624,11 @@ case class BlockMatrixAgg(
outIndexExpr match {
case IndexedSeq() => BlockMatrixSparsity.dense
case IndexedSeq(1) => // col vector result; agg over row
- BlockMatrixSparsity(child.typ.nRowBlocks, 1) { (i, _) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(child.typ.nRowBlocks, 1) { (i, _) =>
(0 until child.typ.nColBlocks).exists(j => child.typ.hasBlock(i -> j))
}
case IndexedSeq(0) => // row vector result; agg over col
- BlockMatrixSparsity(1, child.typ.nColBlocks) { (_, j) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(1, child.typ.nColBlocks) { (_, j) =>
(0 until child.typ.nRowBlocks).exists(i => child.typ.hasBlock(i -> j))
}
}
@@ -733,7 +744,7 @@ case class BandSparsifier(blocksOnly: Boolean, l: Long, u: Long) extends BlockMa
val leftBuffer = java.lang.Math.floorDiv(-l, childType.blockSize)
val rightBuffer = java.lang.Math.floorDiv(u, childType.blockSize)
- BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks) { (i, j) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks) { (i, j) =>
j >= (i - leftBuffer) && j <= (i + rightBuffer) && childType.hasBlock(i -> j)
}
}
@@ -753,7 +764,7 @@ case class RowIntervalSparsifier(blocksOnly: Boolean, starts: IndexedSeq[Long],
val blockStarts = starts.grouped(childType.blockSize).map(idxs => childType.getBlockIdx(idxs.min)).toArray
val blockStops = stops.grouped(childType.blockSize).map(idxs => childType.getBlockIdx(idxs.max - 1)).toArray
- BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks) { (i, j) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks) { (i, j) =>
blockStarts(i) <= j && blockStops(i) >= j && childType.hasBlock(i -> j)
}
}
@@ -800,7 +811,7 @@ case class PerBlockSparsifier(blocks: IndexedSeq[Int]) extends BlockMatrixSparsi
val blockSet = blocks.toSet
override def definedBlocks(childType: BlockMatrixType): BlockMatrixSparsity = {
- BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks){ case(i: Int, j: Int) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks){ case(i: Int, j: Int) =>
blockSet.contains(i + j * childType.nRowBlocks)
}
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala
index bd81517bec7..cc8dc82003b 100644
--- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala
@@ -22,8 +22,7 @@ object BlockMatrixWriter {
override val typeHints = ShortTypeHints(
List(classOf[BlockMatrixNativeWriter], classOf[BlockMatrixBinaryWriter], classOf[BlockMatrixRectanglesWriter],
classOf[BlockMatrixBinaryMultiWriter], classOf[BlockMatrixTextMultiWriter],
- classOf[BlockMatrixPersistWriter], classOf[BlockMatrixNativeMultiWriter]))
- override val typeHintFieldName: String = "name"
+ classOf[BlockMatrixPersistWriter], classOf[BlockMatrixNativeMultiWriter]), typeHintFieldName = "name")
}
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
index 7035797f6e2..c905f5cd27b 100644
--- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala
@@ -515,7 +515,7 @@ class Emit[C](
case If(cond, cnsq, altr) =>
assert(cnsq.typ == TVoid && altr.typ == TVoid)
- emitI(cond).consume(cb, {}, m => cb.ifx(m.tcode[Boolean], emitVoid(cnsq), emitVoid(altr)))
+ emitI(cond).consume(cb, {}, m => cb.ifx(m.asBoolean.boolCode(cb), emitVoid(cnsq), emitVoid(altr)))
case Let(name, value, body) => value.pType match {
case streamType: PCanonicalStream =>
@@ -1011,7 +1011,7 @@ class Emit[C](
FastIndexedSeq(typeInfo[Region], keyValTyp.asEmitParam, keyValTyp.asEmitParam),
BooleanInfo)
isSame.emitWithBuilder { cb =>
- emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.tcode[Boolean])
+ emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.asBoolean.boolCode(cb))
}
val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx")
@@ -2186,7 +2186,10 @@ class Emit[C](
val cmp2 = ApplyComparisonOp(EQWithNA(eltVType), In(0, eltType), In(1, eltType))
InferPType(cmp2)
val EmitCode(s, m, pv) = emitInMethod(cmp2, discardNext)
- discardNext.emit(Code(s, m || pv.tcode[Boolean]))
+ discardNext.emitWithBuilder { cb =>
+ cb += s
+ m || pv.asBoolean.boolCode(cb)
+ }
val lessThan = ApplyComparisonOp(Compare(eltVType), In(0, eltType), In(1, eltType)) < 0
InferPType(lessThan)
(a, lessThan, sorter.distinctFromSorted { (r, v1, m1, v2, m2) =>
@@ -2208,7 +2211,10 @@ class Emit[C](
val cmp2 = ApplyComparisonOp(EQWithNA(keyType.virtualType), k0, k1).deepCopy()
InferPType(cmp2)
val EmitCode(s, m, pv) = emitInMethod(cmp2, discardNext)
- discardNext.emit(Code(s, m || pv.tcode[Boolean]))
+ discardNext.emitWithBuilder { cb =>
+ cb += s
+ m || pv.asBoolean.boolCode(cb)
+ }
val lessThan = (ApplyComparisonOp(Compare(keyType.virtualType), k0, k1) < 0).deepCopy()
InferPType(lessThan)
(a, lessThan, Code(sorter.pruneMissing, sorter.distinctFromSorted { (r, v1, m1, v2, m2) =>
@@ -2963,4 +2969,3 @@ abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]])
finish(cb)
}
}
-
diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala
index b04c961a373..6425b263df6 100644
--- a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala
@@ -1954,7 +1954,7 @@ object EmitStream {
emitIR(condIR).flatMap(cb) { cond =>
val xCond = mb.genFieldThisRef[Boolean]("stream_if_cond")
- cb += (xCond := cond.tcode[Boolean])
+ cb.assign(xCond, cond.asBoolean.boolCode(cb))
var leftSS: SizedStream = null
var rightSS: SizedStream = null
val Lmissing = CodeLabel()
diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala
index e0c06538520..8adf9632e48 100644
--- a/hail/src/main/scala/is/hail/expr/ir/IR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala
@@ -649,9 +649,8 @@ object PartitionReader {
classOf[PartitionNativeReaderIndexed],
classOf[PartitionZippedNativeReader],
classOf[AbstractTypedCodecSpec],
- classOf[TypedCodecSpec])
- ) + BufferSpec.shortTypeHints
- override val typeHintFieldName = "name"
+ classOf[TypedCodecSpec]),
+ typeHintFieldName = "name") + BufferSpec.shortTypeHints
} +
new TStructSerializer +
new TypeSerializer +
@@ -664,9 +663,8 @@ object PartitionWriter {
override val typeHints = ShortTypeHints(List(
classOf[PartitionNativeWriter],
classOf[AbstractTypedCodecSpec],
- classOf[TypedCodecSpec])
+ classOf[TypedCodecSpec]), typeHintFieldName = "name"
) + BufferSpec.shortTypeHints
- override val typeHintFieldName = "name"
} +
new TStructSerializer +
new TypeSerializer +
@@ -683,9 +681,9 @@ object MetadataWriter {
classOf[RelationalWriter],
classOf[RVDSpecMaker],
classOf[AbstractTypedCodecSpec],
- classOf[TypedCodecSpec])
+ classOf[TypedCodecSpec]),
+ typeHintFieldName = "name"
) + BufferSpec.shortTypeHints
- override val typeHintFieldName = "name"
} +
new TStructSerializer +
new TypeSerializer +
diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
index e22acac94b8..e72c37ed32a 100644
--- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala
@@ -28,8 +28,7 @@ object MatrixWriter {
implicit val formats: Formats = new DefaultFormats() {
override val typeHints = ShortTypeHints(
List(classOf[MatrixNativeWriter], classOf[MatrixVCFWriter], classOf[MatrixGENWriter],
- classOf[MatrixBGENWriter], classOf[MatrixPLINKWriter], classOf[WrappedMatrixWriter]))
- override val typeHintFieldName = "name"
+ classOf[MatrixBGENWriter], classOf[MatrixPLINKWriter], classOf[WrappedMatrixWriter]), typeHintFieldName = "name")
}
}
@@ -339,8 +338,7 @@ case class MatrixPLINKWriter(
object MatrixNativeMultiWriter {
implicit val formats: Formats = new DefaultFormats() {
- override val typeHints = ShortTypeHints(List(classOf[MatrixNativeMultiWriter]))
- override val typeHintFieldName = "name"
+ override val typeHints = ShortTypeHints(List(classOf[MatrixNativeMultiWriter]), typeHintFieldName = "name")
}
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala
index 742443d35e9..76cd2ee8723 100644
--- a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala
@@ -21,10 +21,9 @@ import is.hail.variant.ReferenceGenome
import org.json4s.{DefaultFormats, Formats, ShortTypeHints}
object TableWriter {
- implicit val formats: Formats = new DefaultFormats() {
+ implicit val formats: Formats = new DefaultFormats() {
override val typeHints = ShortTypeHints(
- List(classOf[TableNativeWriter], classOf[TableTextWriter]))
- override val typeHintFieldName = "name"
+ List(classOf[TableNativeWriter], classOf[TableTextWriter]), typeHintFieldName = "name")
}
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala
index a8bd02549bb..9d36254542f 100644
--- a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala
@@ -5,6 +5,7 @@ import is.hail.types.virtual.Type
import is.hail.types.{BlockMatrixType, MatrixType, RTable, TableType, TypeWithRequiredness}
import is.hail.linalg.BlockMatrix
import is.hail.methods._
+import is.hail.utils._
import is.hail.rvd.RVDType
import org.json4s.{Extraction, JValue, ShortTypeHints}
import org.json4s.jackson.{JsonMethods, Serialization}
@@ -129,14 +130,16 @@ object RelationalFunctions {
classOf[WrappedMatrixToTableFunction],
classOf[WrappedMatrixToValueFunction],
classOf[PCRelate]
- ))
+ ), typeHintFieldName = "name")
def extractTo[T: Manifest](ctx: ExecuteContext, config: String): T = {
val jv = JsonMethods.parse(config)
(jv \ "name").extract[String] match {
case "VEP" => VEP.fromJValue(ctx.fs, jv).asInstanceOf[T]
- case _ =>
+ case _ => {
+ log.info("JSON: " + jv.toString)
jv.extract[T]
+ }
}
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala
index fcacbc5be04..e79b3e6752c 100644
--- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala
@@ -299,8 +299,11 @@ object StringFunctions extends RegistryFunctions {
(rType: Type, _: Seq[PType]) => PType.canonical(rType, true), typeParameters = Array(tv("T"))
) { case (er, cb, _, resultType, Array(s: PStringCode)) =>
- val row = Code.invokeScalaObject2[String, Type, Row](JSONAnnotationImpex.getClass, "irImportAnnotation",
- s.loadString(), er.mb.ecb.getType(resultType.virtualType.asInstanceOf[TTuple].types(0)))
+ val warnCtx = cb.emb.genFieldThisRef[mutable.HashSet[String]]("parse_json_context")
+ cb.ifx(warnCtx.load().isNull, cb.assign(warnCtx, Code.newInstance[mutable.HashSet[String]]()))
+
+ val row = Code.invokeScalaObject3[String, Type, mutable.HashSet[String], Row](JSONAnnotationImpex.getClass, "irImportAnnotation",
+ s.loadString(), er.mb.ecb.getType(resultType.virtualType.asInstanceOf[TTuple].types(0)), warnCtx)
unwrapReturn(cb, er.region, resultType, row)
}
diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala
index 4c9bb06ab1a..4b7be878a58 100644
--- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala
+++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala
@@ -311,8 +311,10 @@ object LowerBlockMatrixIR {
MakeTuple.ordered(FastSeq(rows, cols))
}.mapBody { (ctx, body) => NDArraySlice(body, GetField(ctx, "new")) }
- case BlockMatrixDensify(child) => unimplemented(bmir)
- case BlockMatrixSparsify(child, sparsifier) => unimplemented(bmir)
+ // Both densify and sparsify change the sparsity pattern tracked on the BlockMatrixType.
+ case BlockMatrixDensify(child) => lower(child)
+ case BlockMatrixSparsify(child, sparsifier) => lower(child)
+
case RelationalLetBlockMatrix(name, value, body) => unimplemented(bmir)
case ValueToBlockMatrix(child, shape, blockSize) if !child.typ.isInstanceOf[TArray] =>
throw new LowererUnsupportedOperation("use explicit broadcast for scalars!")
diff --git a/hail/src/main/scala/is/hail/io/BufferSpecs.scala b/hail/src/main/scala/is/hail/io/BufferSpecs.scala
index abe80042761..b98fca3aa4a 100644
--- a/hail/src/main/scala/is/hail/io/BufferSpecs.scala
+++ b/hail/src/main/scala/is/hail/io/BufferSpecs.scala
@@ -66,7 +66,7 @@ object BufferSpec {
classOf[LEB128BufferSpec],
classOf[BlockingBufferSpec],
classOf[StreamBufferSpec]
- ))
+ ), typeHintFieldName = "name")
}
trait BufferSpec extends Spec {
diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
index 657d303e583..e5cb8e3a029 100644
--- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
+++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala
@@ -30,9 +30,8 @@ object AbstractRVDSpec {
classOf[compatibility.IndexSpec],
classOf[compatibility.UnpartitionedRVDSpec],
classOf[AbstractTypedCodecSpec],
- classOf[TypedCodecSpec])
- ) + BufferSpec.shortTypeHints
- override val typeHintFieldName = "name"
+ classOf[TypedCodecSpec]),
+ typeHintFieldName = "name") + BufferSpec.shortTypeHints
} +
new TStructSerializer +
new TypeSerializer +
diff --git a/hail/src/main/scala/is/hail/types/BlockMatrixType.scala b/hail/src/main/scala/is/hail/types/BlockMatrixType.scala
index ef05efc0f50..84e957b41c5 100644
--- a/hail/src/main/scala/is/hail/types/BlockMatrixType.scala
+++ b/hail/src/main/scala/is/hail/types/BlockMatrixType.scala
@@ -7,11 +7,11 @@ import is.hail.linalg.BlockMatrix
object BlockMatrixSparsity {
private val builder: BoxedArrayBuilder[(Int, Int)] = new BoxedArrayBuilder[(Int, Int)]
- val dense: BlockMatrixSparsity = BlockMatrixSparsity(None)
+ val dense: BlockMatrixSparsity = new BlockMatrixSparsity(None: Option[IndexedSeq[(Int, Int)]])
def apply(definedBlocks: IndexedSeq[(Int, Int)]): BlockMatrixSparsity = BlockMatrixSparsity(Some(definedBlocks))
- def apply(nRows: Int, nCols: Int)(exists: (Int, Int) => Boolean): BlockMatrixSparsity = {
+ def constructFromShapeAndFunction(nRows: Int, nCols: Int)(exists: (Int, Int) => Boolean): BlockMatrixSparsity = {
var i = 0
builder.clear()
while (i < nRows) {
@@ -47,7 +47,7 @@ case class BlockMatrixSparsity(definedBlocks: Option[IndexedSeq[(Int, Int)]]) {
def condense(blockOverlaps: => (Array[Array[Int]], Array[Array[Int]])): BlockMatrixSparsity = {
definedBlocks.map { _ =>
val (ro, co) = blockOverlaps
- BlockMatrixSparsity(ro.length, co.length) { (i, j) =>
+ BlockMatrixSparsity.constructFromShapeAndFunction(ro.length, co.length) { (i, j) =>
ro(i).exists(ii => co(j).exists(jj => hasBlock(ii -> jj)))
}
}.getOrElse(BlockMatrixSparsity.dense)
diff --git a/monitoring/Dockerfile.test b/monitoring/Dockerfile.test
index 35290dacdd8..c5f31840903 100644
--- a/monitoring/Dockerfile.test
+++ b/monitoring/Dockerfile.test
@@ -1,4 +1,3 @@
FROM {{ service_base_image.image }}
COPY monitoring/test/ /test/
-RUN hail-pip-install pytest-instafail==0.4.1 pytest-asyncio==0.10.0
diff --git a/monitoring/test/test_monitoring.py b/monitoring/test/test_monitoring.py
index 81d695a6f4f..9952c0b9c9c 100644
--- a/monitoring/test/test_monitoring.py
+++ b/monitoring/test/test_monitoring.py
@@ -8,12 +8,11 @@
from hailtop.httpx import client_session
import hailtop.utils as utils
-pytestmark = pytest.mark.asyncio
-
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
+@pytest.mark.asyncio
async def test_billing_monitoring():
deploy_config = get_deploy_config()
monitoring_deploy_config_url = deploy_config.url('monitoring', '/api/v1alpha/billing')
diff --git a/query/query/query.py b/query/query/query.py
index a6ed04646b9..00af2adb260 100644
--- a/query/query/query.py
+++ b/query/query/query.py
@@ -264,15 +264,11 @@ async def on_cleanup(app):
)
-async def on_shutdown(app):
+async def on_shutdown(_):
# Filter the asyncio.current_task(), because if we await
# the current task we'll end up in a deadlock
- remaining_tasks = [
- t for t in asyncio.all_tasks() if t is not asyncio.current_task()
- ]
- log.info(
- f"On shutdown request received, with {len(remaining_tasks)} remaining tasks"
- )
+ remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
+ log.info(f"On shutdown request received, with {len(remaining_tasks)} remaining tasks")
await asyncio.wait(*remaining_tasks)
log.info("All tasks on shutdown have completed")
diff --git a/router/deployment.yaml b/router/deployment.yaml
index 59380108c13..7c9f2a1e32f 100644
--- a/router/deployment.yaml
+++ b/router/deployment.yaml
@@ -176,8 +176,7 @@ metadata:
app: hello
spec:
ports:
- - name: http
- port: 80
+ - port: 443
protocol: TCP
targetPort: 5000
selector:
diff --git a/router/router.nginx.conf.in b/router/router.nginx.conf.in
index 4eca32aec79..2ab84162c1d 100644
--- a/router/router.nginx.conf.in
+++ b/router/router.nginx.conf.in
@@ -66,7 +66,7 @@ server {
server_name hello.*;
location / {
- proxy_pass http://hello/;
+ proxy_pass https://hello/;
include /etc/nginx/proxy.conf;
}
|