Skip to content

Commit

Permalink
Escape spark.*.extraJavaOptions correctly
Browse files Browse the repository at this point in the history
We previously never dealt with this correctly, in that we evaluated
all backslashes twice, once when passing spark.*.extraJavaOptions
into SparkSubmit, and another time when calling
Utils.splitCommandString.

This means we need to pass the raw values of these configs directly
to the JVM without evaluating the backslashes when launching
SparkSubmit. The way we do this is through a few custom environment
variables.

As of this commit, the user should follow the format outlined in
spark-defaults.conf.template for spark.*.extraJavaOptions, and
the expected java options (with quotes, whitespaces and backslashes
and everything) will be propagated to the driver or the executors
correctly.
  • Loading branch information
andrewor14 committed Aug 7, 2014
1 parent aabfc7e commit a992ae2
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 27 deletions.
81 changes: 54 additions & 27 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ ORIG_ARGS=("$@")
# Load utility functions
. "$SPARK_HOME/bin/utils.sh"

# For client mode, the driver will be launched in the JVM that launches
# SparkSubmit, so we need to handle the class paths, java options, and
# memory pre-emptively in bash. Otherwise, it will be too late by the
# time the JVM has started.

while (($#)); do
if [ "$1" = "--deploy-mode" ]; then
DEPLOY_MODE=$2
Expand All @@ -46,32 +41,64 @@ while (($#)); do
done

DEPLOY_MODE=${DEPLOY_MODE:-"client"}
PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"}
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}

unset DRIVER_EXTRA_JAVA_OPTIONS
unset EXECUTOR_EXTRA_JAVA_OPTIONS

# A few Spark configs must be parsed early on before launching the JVM:
#
# [spark.driver.extra*]
# These configs encode java options, class paths, and library paths
# needed to launch the JVM if we are running Spark in client mode
#
# [spark.*.extraJavaOptions]
# The escaped characters in these configs must be preserved for
# splitting the arguments in Java later. For these configs, we
# export the raw values as environment variables.
#
if [[ -f "$PROPERTIES_FILE" ]]; then
echo "Using properties file $PROPERTIES_FILE." 1>&2
# This exports the value of the given key into JAVA_PROPERTY_VALUE
parse_java_property "spark.driver.memory"
DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraLibraryPath"
DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraClassPath"
DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraJavaOptions"
DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.executor.extraJavaOptions"
EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then
export DRIVER_EXTRA_JAVA_OPTS
fi
if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then
export EXECUTOR_EXTRA_JAVA_OPTS
fi
elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then
echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2
fi

# For client mode, the driver will be launched in the JVM that launches
# SparkSubmit, so we need to handle the class paths, java options, and
# memory pre-emptively in bash. Otherwise, it will be too late by the
# time the JVM has started.

if [ $DEPLOY_MODE == "client" ]; then
# Parse the default properties file here for spark.driver.* configs
if [ -f "$PROPERTIES_FILE" ]; then
echo "Using properties file $PROPERTIES_FILE." 1>&2
# This exports the value of the given key into JAVA_PROPERTY_VALUE
parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE"
if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS"
fi
if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then
export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH"
fi
if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then
export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH"
fi
else
echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2
if [[ $DEPLOY_MODE == "client" ]]; then
if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS"
fi
if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then
export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH"
fi
if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then
export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH"
fi
# Favor command line memory over config memory
DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"}
if [ -n "$DRIVER_MEMORY" ]; then
if [[ -n "$DRIVER_MEMORY" ]]; then
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
fi
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
}
}
}
// For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it
// un-escapes certain characters (" and \) needed to split the string into java options.
// For these configs, use the equivalent environment variables instead.
sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts =>
defaultProperties("spark.driver.extraJavaOptions") = opts
}
sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts =>
defaultProperties("spark.executor.extraJavaOptions") = opts
}
defaultProperties
}

Expand Down

0 comments on commit a992ae2

Please sign in to comment.