From 3ac132a222b5bcf1a154161552c88a33096b10e2 Mon Sep 17 00:00:00 2001 From: Uri Laserson Date: Thu, 25 Sep 2014 00:04:13 -0700 Subject: [PATCH] [ADAM-388] Sets Kryo serialization with --conf args Fixes #388 --- bin/adam-pyspark | 5 ++++- bin/adam-shell | 5 ++++- bin/adam-spark-defaults.conf | 20 -------------------- bin/adam-submit | 5 ++++- 4 files changed, 12 insertions(+), 23 deletions(-) delete mode 100644 bin/adam-spark-defaults.conf diff --git a/bin/adam-pyspark b/bin/adam-pyspark index 6026e74327..c2b9491b1c 100755 --- a/bin/adam-pyspark +++ b/bin/adam-pyspark @@ -36,6 +36,9 @@ fi # submit the job to Spark "$SPARK_SHELL" \ - --properties-file "$ADAM_REPO"/bin/adam-spark-defaults.conf \ + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ + --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \ + --conf spark.kryoserializer.buffer.mb=4 \ + --conf spark.kryo.referenceTracking=true \ --jars "$ADAM_JARS" \ "$@" diff --git a/bin/adam-shell b/bin/adam-shell index 206461eedc..f742776f0a 100755 --- a/bin/adam-shell +++ b/bin/adam-shell @@ -36,6 +36,9 @@ fi # submit the job to Spark "$SPARK_SHELL" \ - --properties-file "$ADAM_REPO"/bin/adam-spark-defaults.conf \ + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ + --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \ + --conf spark.kryoserializer.buffer.mb=4 \ + --conf spark.kryo.referenceTracking=true \ --jars "$ADAM_JARS" \ "$@" diff --git a/bin/adam-spark-defaults.conf b/bin/adam-spark-defaults.conf deleted file mode 100644 index 87e36a5037..0000000000 --- a/bin/adam-spark-defaults.conf +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to Big Data Genomics (BDG) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The BDG licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -spark.serializer org.apache.spark.serializer.KryoSerializer -spark.kryo.registrator org.bdgenomics.adam.serialization.ADAMKryoRegistrator -spark.kryoserializer.buffer.mb 4 -spark.kryo.referenceTracking true diff --git a/bin/adam-submit b/bin/adam-submit index 1341209322..3712c95604 100755 --- a/bin/adam-submit +++ b/bin/adam-submit @@ -49,7 +49,10 @@ fi # submit the job to Spark "$SPARK_SUBMIT" \ --class org.bdgenomics.adam.cli.ADAMMain \ - --properties-file "$ADAM_REPO"/bin/adam-spark-defaults.conf \ + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ + --conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \ + --conf spark.kryoserializer.buffer.mb=4 \ + --conf spark.kryo.referenceTracking=true \ --jars "$ADAM_JARS" \ "$ADAM_CLI_JAR" \ "$@"