Skip to content

Commit

Permalink
Merge branch 'master' of github.com:nchammas/spark
Browse files Browse the repository at this point in the history
  • Loading branch information
nchammas committed Sep 16, 2014
2 parents 03180a4 + 8051486 commit 0b47ca4
Show file tree
Hide file tree
Showing 243 changed files with 5,725 additions and 3,984 deletions.
14 changes: 14 additions & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<!-- Use the shade plugin to create a big JAR with all the dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down
2 changes: 0 additions & 2 deletions bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ import org.scalatest.time.SpanSugar._
import org.apache.spark._
import org.apache.spark.storage.StorageLevel

import scala.language.postfixOps

class TestVertex(val active: Boolean, val age: Int) extends Vertex with Serializable
class TestMessage(val targetId: String) extends Message[String] with Serializable

Expand Down
2 changes: 1 addition & 1 deletion bin/beeline
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
set -o posix

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

CLASS="org.apache.hive.beeline.BeeLine"
exec "$FWDIR/bin/spark-class" $CLASS "$@"
13 changes: 7 additions & 6 deletions bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
SCALA_VERSION=2.10

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

# Build up classpath
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
Expand All @@ -43,6 +43,7 @@ if [ -n "$SPARK_PREPEND_CLASSES" ]; then
echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
"classes ahead of assembly." >&2
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
Expand All @@ -63,7 +64,7 @@ else
assembly_folder="$ASSEMBLY_DIR"
fi

num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
if [ "$num_jars" -eq "0" ]; then
echo "Failed to find Spark assembly in $assembly_folder"
echo "You need to build Spark before running this program."
Expand All @@ -77,7 +78,7 @@ if [ "$num_jars" -gt "1" ]; then
exit 1
fi

ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"

# Verify that versions of java used to build the jars and run Spark are compatible
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
Expand All @@ -103,8 +104,8 @@ else
datanucleus_dir="$FWDIR"/lib_managed/jars
fi

datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")
datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"

if [ -n "$datanucleus_jars" ]; then
hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
Expand Down
4 changes: 2 additions & 2 deletions bin/load-spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
export SPARK_ENV_LOADED=1

# Returns the parent of the directory this script lives in.
parent_dir="$(cd `dirname $0`/..; pwd)"
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"

user_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"

if [ -f "${user_conf_dir}/spark-env.sh" ]; then
# Promote all variable declarations to environment (exported) variables
Expand Down
20 changes: 10 additions & 10 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
#

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

source $FWDIR/bin/utils.sh
source "$FWDIR/bin/utils.sh"

SCALA_VERSION=2.10

function usage() {
echo "Usage: ./bin/pyspark [options]" 1>&2
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

Expand All @@ -48,7 +48,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
fi
fi

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

# Figure out which Python executable to use
if [[ -z "$PYSPARK_PYTHON" ]]; then
Expand All @@ -57,12 +57,12 @@ fi
export PYSPARK_PYTHON

# Add the PySpark classes to the Python path:
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"

# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"

# If IPython options are specified, assume user wants to run IPython
if [[ -n "$IPYTHON_OPTS" ]]; then
Expand Down Expand Up @@ -99,10 +99,10 @@ fi
if [[ "$1" =~ \.py$ ]]; then
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
primary=$1
primary="$1"
shift
gatherSparkSubmitOpts "$@"
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
else
# PySpark shell requires special handling downstream
export PYSPARK_SHELL=1
Expand Down
8 changes: 4 additions & 4 deletions bin/run-example
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

SCALA_VERSION=2.10

FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_HOME="$FWDIR"
EXAMPLES_DIR="$FWDIR"/examples

Expand All @@ -35,12 +35,12 @@ else
fi

if [ -f "$FWDIR/RELEASE" ]; then
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
fi

if [[ -z $SPARK_EXAMPLES_JAR ]]; then
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
echo "You need to build Spark before running this program" 1>&2
exit 1
Expand Down
20 changes: 10 additions & 10 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ esac
SCALA_VERSION=2.10

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

if [ -z "$1" ]; then
echo "Usage: spark-class <class> [<args>]" 1>&2
Expand Down Expand Up @@ -105,7 +105,7 @@ else
exit 1
fi
fi
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')

# Set JAVA_OPTS to be able to load native libraries and to set heap size
if [ "$JAVA_VERSION" -ge 18 ]; then
Expand All @@ -117,7 +117,7 @@ JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"

# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e "$FWDIR/conf/java-opts" ] ; then
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
fi

# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
Expand All @@ -126,21 +126,21 @@ TOOLS_DIR="$FWDIR"/tools
SPARK_TOOLS_JAR=""
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the SBT build
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
fi
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the Maven build
# TODO: this also needs to become an assembly!
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
fi

# Compute classpath using external script
classpath_output=$($FWDIR/bin/compute-classpath.sh)
classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
if [[ "$?" != "0" ]]; then
echo "$classpath_output"
exit 1
else
CLASSPATH=$classpath_output
CLASSPATH="$classpath_output"
fi

if [[ "$1" =~ org.apache.spark.tools.* ]]; then
Expand All @@ -153,9 +153,9 @@ if [[ "$1" =~ org.apache.spark.tools.* ]]; then
fi

if $cygwin; then
CLASSPATH=`cygpath -wp $CLASSPATH`
CLASSPATH="`cygpath -wp "$CLASSPATH"`"
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
fi
fi
export CLASSPATH
Expand Down
10 changes: 5 additions & 5 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ esac
set -o posix

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

function usage() {
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
fi

source $FWDIR/bin/utils.sh
source "$FWDIR"/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

Expand All @@ -54,11 +54,11 @@ function main() {
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}

Expand Down
8 changes: 4 additions & 4 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
CLASS_NOT_FOUND_EXIT_STATUS=1

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

function usage {
echo "Usage: ./bin/spark-sql [options] [cli option]"
Expand All @@ -38,18 +38,18 @@ function usage {
pattern+="\|--help"
pattern+="\|======="

$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
echo
echo "CLI options:"
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
"$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
exit 0
fi

source $FWDIR/bin/utils.sh
source "$FWDIR"/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

Expand Down
4 changes: 2 additions & 2 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!

export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
ORIG_ARGS=("$@")

while (($#)); do
Expand Down Expand Up @@ -59,5 +59,5 @@ if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FI
fi
fi

exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

27 changes: 27 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,33 @@
</execution>
</executions>
</plugin>
<!--
Copy guava to the build directory. This is needed to make the SPARK_PREPEND_CLASSES
option work in compute-classpath.sh, since it would put the non-shaded Spark classes in
the runtime classpath.
-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<useSubDirectoryPerType>true</useSubDirectoryPerType>
<includeArtifactIds>guava</includeArtifactIds>
<silent>true</silent>
</configuration>
</execution>
</executions>
</plugin>
</plugins>

<resources>
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/SecurityManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {

// always add the current user and SPARK_USER to the viewAcls
private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
Option(System.getenv("SPARK_USER")).getOrElse(""))
Option(System.getenv("SPARK_USER")).getOrElse("")).filter(!_.isEmpty)

setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))
Expand Down
Loading

0 comments on commit 0b47ca4

Please sign in to comment.