Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[query] Upgrade spark to 3.3.0 and dataproc to 2.1 #12701

Merged
merged 6 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion batch/Dockerfile.worker
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN hail-apt-get-install \

COPY docker/requirements.txt .
RUN chmod 755 /bin/hail-pip-install && \
hail-pip-install -r requirements.txt pyspark==3.2.1
hail-pip-install -r requirements.txt pyspark==3.3.0

ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark
ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin"
Expand Down
61 changes: 8 additions & 53 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,23 +107,6 @@ steps:
dependsOn:
- hail_ubuntu_image
- merge_code
- kind: buildImage2
name: base_spark_3_2
dockerFile: /io/repo/docker/Dockerfile.base-with-spark-3-2
contextPath: /io/repo
publishAs: base_spark_3_2
resources:
storage: 10Gi
cpu: "2"
memory: standard
inputs:
- from: /repo/docker/Dockerfile.base-with-spark-3-2
to: /io/repo/docker/Dockerfile.base-with-spark-3-2
- from: /repo/docker/core-site.xml
to: /io/repo/docker/core-site.xml
dependsOn:
- base_image
- merge_code
- kind: runImage
name: check_pip_dependencies
image:
Expand Down Expand Up @@ -706,34 +689,6 @@ steps:
dependsOn:
- hail_build_image
- merge_code
- kind: runImage
name: build_hail_jar_and_wheel_only_spark_3_2
image:
valueFrom: hail_build_image.image
resources:
memory: standard
cpu: '4'
script: |
set -ex
cd /io/repo/hail
chmod 755 ./gradlew
time retry ./gradlew --version
export SPARK_VERSION="3.2.1" SCALA_VERSION="2.12.12"
time retry make jars wheel
(cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl)
inputs:
- from: /repo
to: /io/repo
outputs:
- from: /io/repo/hail/build/libs/hail-all-spark.jar
to: /just-jar/spark-32/hail.jar
- from: /io/repo/hail/build/libs/hail-all-spark-test.jar
to: /just-jar/spark-32/hail-test.jar
- from: /io/repo/hail/build/deploy/dist/wheel-container.tar
to: /just-wheel/spark-32/wheel-container.tar
dependsOn:
- hail_build_image
- merge_code
- kind: runImage
name: build_hail
image:
Expand Down Expand Up @@ -2201,15 +2156,15 @@ steps:
valueFrom: default_ns.name
mountPath: /batch-gsa-key
inputs:
- from: /just-jar/spark-32/hail.jar
- from: /just-jar/hail.jar
to: /io/hail.jar
- from: /git_version
to: /io/git_version
dependsOn:
- default_ns
- deploy_batch
- hailgenetics_hailtop_image
- build_hail_jar_and_wheel_only_spark_3_2
- build_hail_jar_and_wheel_only
- merge_code
- create_test_gsa_keys
- kind: deploy
Expand Down Expand Up @@ -2264,7 +2219,7 @@ steps:
test
timeout: 5400
inputs:
- from: /just-wheel/spark-32/wheel-container.tar
- from: /just-wheel/wheel-container.tar
to: /io/wheel-container.tar
- from: /repo/hail/python/test
to: /io/repo/hail/python/test
Expand Down Expand Up @@ -2295,7 +2250,7 @@ steps:
- hail_run_image
- upload_query_jar
- upload_test_resources_to_blob_storage
- build_hail_jar_and_wheel_only_spark_3_2
- build_hail_jar_and_wheel_only
- kind: buildImage2
name: netcat_ubuntu_image
publishAs: netcat
Expand Down Expand Up @@ -3194,7 +3149,7 @@ steps:
- kind: runImage
name: test_hail_scala_fs
image:
valueFrom: base_spark_3_2.image
valueFrom: hail_run_image.image
resources:
memory: standard
cpu: '2'
Expand Down Expand Up @@ -3228,7 +3183,7 @@ steps:
inputs:
- from: /resources.tar.gz
to: /io/resources.tar.gz
- from: /just-jar/spark-32/hail-test.jar
- from: /hail-test.jar
to: /io/hail-test.jar
- from: /testng-fs.xml
to: /io/testng-fs.xml
Expand All @@ -3254,9 +3209,9 @@ steps:
- default_ns
- create_certs
- create_accounts
- base_spark_3_2
- hail_run_image
- build_hail
- build_hail_jar_and_wheel_only_spark_3_2
- build_hail_jar_and_wheel_only
- upload_test_resources_to_blob_storage
- kind: runImage
name: test_hail_services_java
Expand Down
3 changes: 1 addition & 2 deletions hail/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ REVISION := $(shell git rev-parse HEAD)
SHORT_REVISION := $(shell git rev-parse --short=12 HEAD)
BRANCH := $(shell git rev-parse --abbrev-ref HEAD)
SCALA_VERSION ?= 2.12.13
SPARK_VERSION ?= 3.1.3
SPARK_VERSION ?= 3.3.0
HAIL_MAJOR_MINOR_VERSION := 0.2
HAIL_PATCH_VERSION := 109
HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION)
Expand Down Expand Up @@ -314,7 +314,6 @@ upload-qob-test-resources: $(JAR_TEST_SOURCES)
# target must be run at least once a day if using a dev NAMESPACE.
# To trigger this target to re-run,
# > rm upload-qob-jar
upload-qob-jar: SPARK_VERSION := 3.2.1
upload-qob-jar: $(SHADOW_JAR)
! [ -z $(NAMESPACE) ] # call this like: make upload-qob-jar NAMESPACE=default
gsutil -m cp $(SHADOW_JAR) $(JAR_LOCATION)
Expand Down
20 changes: 4 additions & 16 deletions hail/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ tasks.withType(JavaCompile) {
project.ext {
breezeVersion = "1.1"

sparkVersion = System.getProperty("spark.version", "3.1.3")
sparkVersion = System.getProperty("spark.version", "3.3.0")

if (sparkVersion != "3.1.3") {
project.logger.lifecycle("WARNING: Hail primarily tested with Spark 3.1.3, use other versions at your own risk.")
if (sparkVersion != "3.3.0") {
project.logger.lifecycle("WARNING: Hail primarily tested with Spark 3.3.0, use other versions at your own risk.")
}
scalaVersion = System.getProperty("scala.version", "2.12.13")
scalaMajorVersion = (scalaVersion =~ /^\d+.\d+/)[0]
Expand Down Expand Up @@ -203,38 +203,26 @@ dependencies {
exclude group: 'com.fasterxml.jackson'
exclude group: 'com.fasterxml.jackson.core'
exclude group: 'com.fasterxml.jackson.databind'
if (sparkVersion.startsWith("3.1")) {
exclude group: 'com.fasterxml.jackson.dataformat'
exclude group: 'org.codehaus.woodstox'
}
}

bundled(group: 'com.azure', name: 'azure-core-http-netty', version: '1.10.0') {
exclude group: 'com.fasterxml.jackson'
exclude group: 'com.fasterxml.jackson.core'
exclude group: 'com.fasterxml.jackson.databind'
if (sparkVersion.startsWith("3.1")) {
exclude group: 'com.fasterxml.jackson.dataformat'
exclude group: 'org.codehaus.woodstox'
}
}


bundled(group: 'com.azure', name: 'azure-identity', version:'1.2.1') {
exclude group: 'com.fasterxml.jackson'
exclude group: 'com.fasterxml.jackson.core'
exclude group: 'com.fasterxml.jackson.databind'
if (sparkVersion.startsWith("3.1")) {
exclude group: 'com.fasterxml.jackson.dataformat'
exclude group: 'org.codehaus.woodstox'
}
}

bundled group: 'org.freemarker', name: 'freemarker', version: '2.3.31'

bundled 'com.kohlschutter.junixsocket:junixsocket-core:2.6.1'

bundled 'com.github.luben:zstd-jni:1.4.8-1'
bundled 'com.github.luben:zstd-jni:1.5.2-1'
}

task(checkSettings) doLast {
Expand Down
Loading