diff --git a/.gitignore b/.gitignore
index b54a3058de659..4f177c82ae5e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,7 @@
 sbt/*.jar
 .settings
 .cache
-.generated-mima-excludes
+.generated-mima*
 /build/
 work/
 out/
diff --git a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
index 110bd0a9a0c41..55241d33cd3f0 100644
--- a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
+++ b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
@@ -80,7 +80,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
   test("large number of iterations") {
     // This tests whether jobs with a large number of iterations finish in a reasonable time,
     // because non-memoized recursion in RDD or DAGScheduler used to cause them to hang
-    failAfter(10 seconds) {
+    failAfter(30 seconds) {
       sc = new SparkContext("local", "test")
       val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
       val msgs = sc.parallelize(Array[(String, TestMessage)]())
@@ -101,7 +101,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
       sc = new SparkContext("local", "test")
       val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
       val msgs = sc.parallelize(Array[(String, TestMessage)]())
-      val numSupersteps = 50
+      val numSupersteps = 20
       val result =
         Bagel.run(sc, verts, msgs, sc.defaultParallelism, StorageLevel.DISK_ONLY) {
           (self: TestVertex, msgs: Option[Array[TestMessage]], superstep: Int) =>
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 7df43a555d562..2cf4e381c1c88 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -38,8 +38,10 @@ else
   JAR_CMD="jar"
 fi
 
-# First check if we have a dependencies jar. If so, include binary classes with the deps jar
-if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
+# A developer option to prepend more recently compiled Spark classes
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+  echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
+    "classes ahead of assembly." >&2
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
@@ -51,17 +53,31 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
   CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
+fi
 
-  ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null)
+# Use spark-assembly jar from either RELEASE or assembly directory
+if [ -f "$FWDIR/RELEASE" ]; then
+  assembly_folder="$FWDIR"/lib
 else
-  # Else use spark-assembly jar from either RELEASE or assembly directory
-  if [ -f "$FWDIR/RELEASE" ]; then
-    ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null)
-  else
-    ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null)
-  fi
+  assembly_folder="$ASSEMBLY_DIR"
 fi
 
+num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
+if [ "$num_jars" -eq "0" ]; then
+  echo "Failed to find Spark assembly in $assembly_folder"
+  echo "You need to build Spark before running this program."
+  exit 1
+fi
+if [ "$num_jars" -gt "1" ]; then
+  jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
+  echo "Found multiple Spark assembly jars in $assembly_folder:"
+  echo "$jars_list"
+  echo "Please remove all but one jar."
+  exit 1
+fi
+
+ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
+
 # Verify that versions of java used to build the jars and run Spark are compatible
 jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
 if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
diff --git a/bin/pyspark b/bin/pyspark
index d0fa56f31913f..0b5ed40e2157d 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -45,7 +45,7 @@ fi
 . $FWDIR/bin/load-spark-env.sh
 
 # Figure out which Python executable to use
-if [ -z "$PYSPARK_PYTHON" ] ; then
+if [[ -z "$PYSPARK_PYTHON" ]]; then
   PYSPARK_PYTHON="python"
 fi
 export PYSPARK_PYTHON
@@ -59,7 +59,7 @@ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
 export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
 
 # If IPython options are specified, assume user wants to run IPython
-if [ -n "$IPYTHON_OPTS" ]; then
+if [[ -n "$IPYTHON_OPTS" ]]; then
   IPYTHON=1
 fi
 
@@ -76,6 +76,16 @@ for i in "$@"; do
 done
 export PYSPARK_SUBMIT_ARGS
 
+# For pyspark tests
+if [[ -n "$SPARK_TESTING" ]]; then
+  if [[ -n "$PYSPARK_DOC_TEST" ]]; then
+    exec "$PYSPARK_PYTHON" -m doctest $1
+  else
+    exec "$PYSPARK_PYTHON" $1
+  fi
+  exit
+fi
+
 # If a python file is provided, directly run spark-submit.
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
diff --git a/bin/spark-class b/bin/spark-class
index e884511010c6c..cfe363a71da31 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -108,23 +108,6 @@ fi
 export JAVA_OPTS
 # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
 
-if [ ! -f "$FWDIR/RELEASE" ]; then
-  # Exit if the user hasn't compiled Spark
-  num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l)
-  jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
-  if [ "$num_jars" -eq "0" ]; then
-    echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
-    echo "You need to build Spark before running this program." >&2
-    exit 1
-  fi
-  if [ "$num_jars" -gt "1" ]; then
-    echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2
-    echo "$jars_list"
-    echo "Please remove all but one jar."
-    exit 1
-  fi
-fi
-
 TOOLS_DIR="$FWDIR"/tools
 SPARK_TOOLS_JAR=""
 if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index e2d2250982daa..bf3c3a6ceb5ef 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -96,7 +96,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Register a ShuffleDependency for cleanup when it is garbage collected. */
-  def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _]) {
+  def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
     registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 2c31cc20211ff..c8c194a111aac 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.ShuffleHandle
 
 /**
  * :: DeveloperApi ::
@@ -50,19 +51,24 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
  * Represents a dependency on the output of a shuffle stage.
  * @param rdd the parent RDD
  * @param partitioner partitioner used to partition the shuffle output
- * @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to null,
+ * @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to None,
  *                   the default serializer, as specified by `spark.serializer` config option, will
  *                   be used.
  */
 @DeveloperApi
-class ShuffleDependency[K, V](
+class ShuffleDependency[K, V, C](
     @transient rdd: RDD[_ <: Product2[K, V]],
     val partitioner: Partitioner,
-    val serializer: Serializer = null)
+    val serializer: Option[Serializer] = None,
+    val keyOrdering: Option[Ordering[K]] = None,
+    val aggregator: Option[Aggregator[K, V, C]] = None)
   extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
 
   val shuffleId: Int = rdd.context.newShuffleId()
 
+  val shuffleHandle: ShuffleHandle = rdd.context.env.shuffleManager.registerShuffle(
+    shuffleId, rdd.partitions.size, this)
+
   rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
 }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d721aba709600..35970c2f50892 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -290,6 +290,9 @@ class SparkContext(config: SparkConf) extends Logging {
     value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
     executorEnvs(envKey) = value
   }
+  Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v => 
+    executorEnvs("SPARK_PREPEND_CLASSES") = v
+  }
   // The Mesos scheduler backend relies on this environment variable to set executor memory.
   // TODO: Set this only in the Mesos scheduler.
   executorEnvs("SPARK_EXECUTOR_MEMORY") = executorMemory + "m"
@@ -297,7 +300,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   // Set SPARK_USER for user who is running SparkContext.
   val sparkUser = Option {
-    Option(System.getProperty("user.name")).getOrElse(System.getenv("SPARK_USER"))
+    Option(System.getenv("SPARK_USER")).getOrElse(System.getProperty("user.name"))
   }.getOrElse {
     SparkContext.SPARK_UNKNOWN_USER
   }
@@ -431,12 +434,21 @@ class SparkContext(config: SparkConf) extends Logging {
 
   // Methods for creating RDDs
 
-  /** Distribute a local Scala collection to form an RDD. */
+  /** Distribute a local Scala collection to form an RDD.
+   *
+   * @note Parallelize acts lazily. If `seq` is a mutable collection and is
+   * altered after the call to parallelize and before the first action on the
+   * RDD, the resultant RDD will reflect the modified collection. Pass a copy of
+   * the argument to avoid this.
+   */
   def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
     new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())
   }
 
-  /** Distribute a local Scala collection to form an RDD. */
+  /** Distribute a local Scala collection to form an RDD.
+   *
+   * This method is identical to `parallelize`.
+   */
   def makeRDD[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
     parallelize(seq, numSlices)
   }
@@ -823,9 +835,11 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
+   * :: DeveloperApi ::
    * Return information about what RDDs are cached, if they are in mem or on disk, how much space
    * they take, etc.
    */
+  @DeveloperApi
   def getRDDStorageInfo: Array[RDDInfo] = {
     StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
   }
@@ -837,8 +851,10 @@ class SparkContext(config: SparkConf) extends Logging {
   def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
 
   /**
+   * :: DeveloperApi ::
    * Return information about blocks stored in all of the slaves
    */
+  @DeveloperApi
   def getExecutorStorageStatus: Array[StorageStatus] = {
     env.blockManager.master.getStorageStatus
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 720151a6b0f84..8dfa8cc4b5b3f 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -34,6 +34,7 @@ import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.network.ConnectionManager
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.storage._
 import org.apache.spark.util.{AkkaUtils, Utils}
 
@@ -56,7 +57,7 @@ class SparkEnv (
     val closureSerializer: Serializer,
     val cacheManager: CacheManager,
     val mapOutputTracker: MapOutputTracker,
-    val shuffleFetcher: ShuffleFetcher,
+    val shuffleManager: ShuffleManager,
     val broadcastManager: BroadcastManager,
     val blockManager: BlockManager,
     val connectionManager: ConnectionManager,
@@ -80,7 +81,7 @@ class SparkEnv (
     pythonWorkers.foreach { case(key, worker) => worker.stop() }
     httpFileServer.stop()
     mapOutputTracker.stop()
-    shuffleFetcher.stop()
+    shuffleManager.stop()
     broadcastManager.stop()
     blockManager.stop()
     blockManager.master.stop()
@@ -163,13 +164,20 @@ object SparkEnv extends Logging {
     def instantiateClass[T](propertyName: String, defaultClassName: String): T = {
       val name = conf.get(propertyName,  defaultClassName)
       val cls = Class.forName(name, true, Utils.getContextOrSparkClassLoader)
-      // First try with the constructor that takes SparkConf. If we can't find one,
-      // use a no-arg constructor instead.
+      // Look for a constructor taking a SparkConf and a boolean isDriver, then one taking just
+      // SparkConf, then one taking no arguments
       try {
-        cls.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[T]
+        cls.getConstructor(classOf[SparkConf], java.lang.Boolean.TYPE)
+          .newInstance(conf, new java.lang.Boolean(isDriver))
+          .asInstanceOf[T]
       } catch {
         case _: NoSuchMethodException =>
-            cls.getConstructor().newInstance().asInstanceOf[T]
+          try {
+            cls.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[T]
+          } catch {
+            case _: NoSuchMethodException =>
+              cls.getConstructor().newInstance().asInstanceOf[T]
+          }
       }
     }
 
@@ -219,9 +227,6 @@ object SparkEnv extends Logging {
 
     val cacheManager = new CacheManager(blockManager)
 
-    val shuffleFetcher = instantiateClass[ShuffleFetcher](
-      "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher")
-
     val httpFileServer = new HttpFileServer(securityManager)
     httpFileServer.initialize()
     conf.set("spark.fileserver.uri",  httpFileServer.serverUri)
@@ -242,6 +247,9 @@ object SparkEnv extends Logging {
       "."
     }
 
+    val shuffleManager = instantiateClass[ShuffleManager](
+      "spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager")
+
     // Warn about deprecated spark.cache.class property
     if (conf.contains("spark.cache.class")) {
       logWarning("The spark.cache.class property is no longer being used! Specify storage " +
@@ -255,7 +263,7 @@ object SparkEnv extends Logging {
       closureSerializer,
       cacheManager,
       mapOutputTracker,
-      shuffleFetcher,
+      shuffleManager,
       broadcastManager,
       blockManager,
       connectionManager,
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 7dcfbf741c4f1..14fa9d8135afe 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -228,6 +228,50 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   : PartialResult[java.util.Map[K, BoundedDouble]] =
     rdd.countByKeyApprox(timeout, confidence).map(mapAsJavaMap)
 
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+   * as in scala.TraversableOnce. The former operation is used for merging values within a
+   * partition, and the latter is used for merging values between partitions. To avoid memory
+   * allocation, both of these functions are allowed to modify and return their first argument
+   * instead of creating a new U.
+   */
+  def aggregateByKey[U](zeroValue: U, partitioner: Partitioner, seqFunc: JFunction2[U, V, U],
+      combFunc: JFunction2[U, U, U]): JavaPairRDD[K, U] = {
+    implicit val ctag: ClassTag[U] = fakeClassTag
+    fromRDD(rdd.aggregateByKey(zeroValue, partitioner)(seqFunc, combFunc))
+  }
+
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+   * as in scala.TraversableOnce. The former operation is used for merging values within a
+   * partition, and the latter is used for merging values between partitions. To avoid memory
+   * allocation, both of these functions are allowed to modify and return their first argument
+   * instead of creating a new U.
+   */
+  def aggregateByKey[U](zeroValue: U, numPartitions: Int, seqFunc: JFunction2[U, V, U],
+      combFunc: JFunction2[U, U, U]): JavaPairRDD[K, U] = {
+    implicit val ctag: ClassTag[U] = fakeClassTag
+    fromRDD(rdd.aggregateByKey(zeroValue, numPartitions)(seqFunc, combFunc))
+  }
+
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's.
+   * The former operation is used for merging values within a partition, and the latter is used for
+   * merging values between partitions. To avoid memory allocation, both of these functions are
+   * allowed to modify and return their first argument instead of creating a new U.
+   */
+  def aggregateByKey[U](zeroValue: U, seqFunc: JFunction2[U, V, U], combFunc: JFunction2[U, U, U]):
+      JavaPairRDD[K, U] = {
+    implicit val ctag: ClassTag[U] = fakeClassTag
+    fromRDD(rdd.aggregateByKey(zeroValue)(seqFunc, combFunc))
+  }
+
   /**
    * Merge the values for each key using an associative function and a neutral "zero value" which
    * may be added to the result an arbitrary number of times, and must not change the result
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index aeb159adc31d9..c371dc3a51c73 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -81,7 +81,7 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
 
       case "kill" =>
         val driverId = driverArgs.driverId
-        val killFuture = masterActor ! RequestKillDriver(driverId)
+        masterActor ! RequestKillDriver(driverId)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index d27e0e1f15c65..d09136de49807 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -23,9 +23,10 @@ import akka.actor.ActorRef
 import com.google.common.base.Charsets
 import com.google.common.io.Files
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
+import org.apache.spark.util.logging.FileAppender
 
 /**
  * Manages the execution of one executor process.
@@ -42,12 +43,15 @@ private[spark] class ExecutorRunner(
     val sparkHome: File,
     val workDir: File,
     val workerUrl: String,
+    val conf: SparkConf,
     var state: ExecutorState.Value)
   extends Logging {
 
   val fullId = appId + "/" + execId
   var workerThread: Thread = null
   var process: Process = null
+  var stdoutAppender: FileAppender = null
+  var stderrAppender: FileAppender = null
 
   // NOTE: This is now redundant with the automated shut-down enforced by the Executor. It might
   // make sense to remove this in the future.
@@ -76,6 +80,13 @@ private[spark] class ExecutorRunner(
     if (process != null) {
       logInfo("Killing process!")
       process.destroy()
+      process.waitFor()
+      if (stdoutAppender != null) {
+        stdoutAppender.stop()
+      }
+      if (stderrAppender != null) {
+        stderrAppender.stop()
+      }
       val exitCode = process.waitFor()
       worker ! ExecutorStateChanged(appId, execId, state, message, Some(exitCode))
     }
@@ -137,11 +148,11 @@ private[spark] class ExecutorRunner(
 
       // Redirect its stdout and stderr to files
       val stdout = new File(executorDir, "stdout")
-      CommandUtils.redirectStream(process.getInputStream, stdout)
+      stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
 
       val stderr = new File(executorDir, "stderr")
       Files.write(header, stderr, Charsets.UTF_8)
-      CommandUtils.redirectStream(process.getErrorStream, stderr)
+      stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
       // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
       // long-lived processes only. However, in the future, we might restart the executor a few
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 100de26170a50..a0ecaf709f8e2 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -235,7 +235,7 @@ private[spark] class Worker(
           val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
             self, workerId, host,
             appDesc.sparkHome.map(userSparkHome => new File(userSparkHome)).getOrElse(sparkHome),
-            workDir, akkaUrl, ExecutorState.RUNNING)
+            workDir, akkaUrl, conf, ExecutorState.RUNNING)
           executors(appId + "/" + execId) = manager
           manager.start()
           coresUsed += cores_
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 8381f59672ea3..6a5ffb1b71bfb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -24,8 +24,10 @@ import scala.xml.Node
 
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 import org.apache.spark.util.Utils
+import org.apache.spark.Logging
+import org.apache.spark.util.logging.{FileAppender, RollingFileAppender}
 
-private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
+private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with Logging {
   private val worker = parent.worker
   private val workDir = parent.workDir
 
@@ -39,21 +41,18 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
     val offset = Option(request.getParameter("offset")).map(_.toLong)
     val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
 
-    val path = (appId, executorId, driverId) match {
+    val logDir = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
-        s"${workDir.getPath}/$appId/$executorId/$logType"
+        s"${workDir.getPath}/$appId/$executorId/"
       case (None, None, Some(d)) =>
-        s"${workDir.getPath}/$driverId/$logType"
+        s"${workDir.getPath}/$driverId/"
       case _ =>
         throw new Exception("Request must specify either application or driver identifiers")
     }
 
-    val (startByte, endByte) = getByteRange(path, offset, byteLength)
-    val file = new File(path)
-    val logLength = file.length
-
-    val pre = s"==== Bytes $startByte-$endByte of $logLength of $path ====\n"
-    pre + Utils.offsetBytes(path, startByte, endByte)
+    val (logText, startByte, endByte, logLength) = getLog(logDir, logType, offset, byteLength)
+    val pre = s"==== Bytes $startByte-$endByte of $logLength of $logDir$logType ====\n"
+    pre + logText
   }
 
   def render(request: HttpServletRequest): Seq[Node] = {
@@ -65,19 +64,16 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
     val offset = Option(request.getParameter("offset")).map(_.toLong)
     val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
 
-    val (path, params) = (appId, executorId, driverId) match {
+    val (logDir, params) = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
-        (s"${workDir.getPath}/$a/$e/$logType", s"appId=$a&executorId=$e")
+        (s"${workDir.getPath}/$a/$e/", s"appId=$a&executorId=$e")
       case (None, None, Some(d)) =>
-        (s"${workDir.getPath}/$d/$logType", s"driverId=$d")
+        (s"${workDir.getPath}/$d/", s"driverId=$d")
       case _ =>
         throw new Exception("Request must specify either application or driver identifiers")
     }
 
-    val (startByte, endByte) = getByteRange(path, offset, byteLength)
-    val file = new File(path)
-    val logLength = file.length
-    val logText = <node>{Utils.offsetBytes(path, startByte, endByte)}</node>
+    val (logText, startByte, endByte, logLength) = getLog(logDir, logType, offset, byteLength)
     val linkToMaster = <p><a href={worker.activeMasterWebUiUrl}>Back to Master</a></p>
     val range = <span>Bytes {startByte.toString} - {endByte.toString} of {logLength}</span>
 
@@ -127,23 +123,37 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
     UIUtils.basicSparkPage(content, logType + " log page for " + appId)
   }
 
-  /** Determine the byte range for a log or log page. */
-  private def getByteRange(path: String, offset: Option[Long], byteLength: Int): (Long, Long) = {
-    val defaultBytes = 100 * 1024
-    val maxBytes = 1024 * 1024
-    val file = new File(path)
-    val logLength = file.length()
-    val getOffset = offset.getOrElse(logLength - defaultBytes)
-    val startByte =
-      if (getOffset < 0) {
-        0L
-      } else if (getOffset > logLength) {
-        logLength
-      } else {
-        getOffset
+  /** Get the part of the log files given the offset and desired length of bytes */
+  private def getLog(
+      logDirectory: String,
+      logType: String,
+      offsetOption: Option[Long],
+      byteLength: Int
+    ): (String, Long, Long, Long) = {
+    try {
+      val files = RollingFileAppender.getSortedRolledOverFiles(logDirectory, logType)
+      logDebug(s"Sorted log files of type $logType in $logDirectory:\n${files.mkString("\n")}")
+
+      val totalLength = files.map { _.length }.sum
+      val offset = offsetOption.getOrElse(totalLength - byteLength)
+      val startIndex = {
+        if (offset < 0) {
+          0L
+        } else if (offset > totalLength) {
+          totalLength
+        } else {
+          offset
+        }
       }
-    val logPageLength = math.min(byteLength, maxBytes)
-    val endByte = math.min(startByte + logPageLength, logLength)
-    (startByte, endByte)
+      val endIndex = math.min(startIndex + totalLength, totalLength)
+      logDebug(s"Getting log from $startIndex to $endIndex")
+      val logText = Utils.offsetBytes(files, startIndex, endIndex)
+      logDebug(s"Got log of length ${logText.length} bytes")
+      (logText, startIndex, endIndex, totalLength)
+    } catch {
+      case e: Exception =>
+        logError(s"Error getting $logType logs from directory $logDirectory", e)
+        ("Error getting logs due to exception: " + e.getMessage, 0, 0, 0)
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala
index 3ffaaab23d0f5..3b6298a26d7c5 100644
--- a/core/src/main/scala/org/apache/spark/network/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/Connection.scala
@@ -210,7 +210,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     var nextMessageToBeUsed = 0
 
     def addMessage(message: Message) {
-      messages.synchronized{
+      messages.synchronized {
         /* messages += message */
         messages.enqueue(message)
         logDebug("Added [" + message + "] to outbox for sending to " +
@@ -223,7 +223,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
         while (!messages.isEmpty) {
           /* nextMessageToBeUsed = nextMessageToBeUsed % messages.size */
           /* val message = messages(nextMessageToBeUsed) */
-          val message = messages.dequeue
+          val message = messages.dequeue()
           val chunk = message.getChunkForSending(defaultChunkSize)
           if (chunk.isDefined) {
             messages.enqueue(message)
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index 5dd5fd0047c0d..cf1c985c2fff9 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -250,14 +250,14 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
     try {
       while(!selectorThread.isInterrupted) {
         while (! registerRequests.isEmpty) {
-          val conn: SendingConnection = registerRequests.dequeue
+          val conn: SendingConnection = registerRequests.dequeue()
           addListeners(conn)
           conn.connect()
           addConnection(conn)
         }
 
         while(!keyInterestChangeRequests.isEmpty) {
-          val (key, ops) = keyInterestChangeRequests.dequeue
+          val (key, ops) = keyInterestChangeRequests.dequeue()
 
           try {
             if (key.isValid) {
@@ -532,9 +532,9 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
           }
           return
         }
-        var securityMsgResp = SecurityMessage.fromResponse(replyToken,
+        val securityMsgResp = SecurityMessage.fromResponse(replyToken,
           securityMsg.getConnectionId.toString())
-        var message = securityMsgResp.toBufferMessage
+        val message = securityMsgResp.toBufferMessage
         if (message == null) throw new Exception("Error creating security message")
         sendSecurityMessage(waitingConn.getRemoteConnectionManagerId(), message)
       } catch  {
@@ -568,9 +568,9 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
           logDebug("Server sasl not completed: " + connection.connectionId)
         }
         if (replyToken != null) {
-          var securityMsgResp = SecurityMessage.fromResponse(replyToken,
+          val securityMsgResp = SecurityMessage.fromResponse(replyToken,
             securityMsg.getConnectionId)
-          var message = securityMsgResp.toBufferMessage
+          val message = securityMsgResp.toBufferMessage
           if (message == null) throw new Exception("Error creating security Message")
           sendSecurityMessage(connection.getRemoteConnectionManagerId(), message)
         }
@@ -618,7 +618,7 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
         return true
       }
     }
-    return false
+    false
   }
 
   private def handleMessage(
@@ -694,9 +694,9 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
           var firstResponse: Array[Byte] = null
           try {
             firstResponse = conn.sparkSaslClient.firstToken()
-            var securityMsg = SecurityMessage.fromResponse(firstResponse,
+            val securityMsg = SecurityMessage.fromResponse(firstResponse,
               conn.connectionId.toString())
-            var message = securityMsg.toBufferMessage
+            val message = securityMsg.toBufferMessage
             if (message == null) throw new Exception("Error creating security message")
             connectionsAwaitingSasl += ((conn.connectionId, conn))
             sendSecurityMessage(connManagerId, message)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 9ff76892aed32..5951865e56c9d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.collection.{ExternalAppendOnlyMap, AppendOnlyMap}
 import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.ShuffleHandle
 
 private[spark] sealed trait CoGroupSplitDep extends Serializable
 
@@ -44,7 +45,7 @@ private[spark] case class NarrowCoGroupSplitDep(
   }
 }
 
-private[spark] case class ShuffleCoGroupSplitDep(shuffleId: Int) extends CoGroupSplitDep
+private[spark] case class ShuffleCoGroupSplitDep(handle: ShuffleHandle) extends CoGroupSplitDep
 
 private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
   extends Partition with Serializable {
@@ -74,10 +75,11 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
   private type CoGroupValue = (Any, Int)  // Int is dependency number
   private type CoGroupCombiner = Seq[CoGroup]
 
-  private var serializer: Serializer = null
+  private var serializer: Option[Serializer] = None
 
+  /** Set a serializer for this RDD's shuffle, or null to use the default (spark.serializer) */
   def setSerializer(serializer: Serializer): CoGroupedRDD[K] = {
-    this.serializer = serializer
+    this.serializer = Option(serializer)
     this
   }
 
@@ -88,7 +90,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
         new OneToOneDependency(rdd)
       } else {
         logDebug("Adding shuffle dependency with " + rdd)
-        new ShuffleDependency[Any, Any](rdd, part, serializer)
+        new ShuffleDependency[K, Any, CoGroupCombiner](rdd, part, serializer)
       }
     }
   }
@@ -100,8 +102,8 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
       array(i) = new CoGroupPartition(i, rdds.zipWithIndex.map { case (rdd, j) =>
         // Assume each RDD contributed a single dependency, and get it
         dependencies(j) match {
-          case s: ShuffleDependency[_, _] =>
-            new ShuffleCoGroupSplitDep(s.shuffleId)
+          case s: ShuffleDependency[_, _, _] =>
+            new ShuffleCoGroupSplitDep(s.shuffleHandle)
           case _ =>
             new NarrowCoGroupSplitDep(rdd, i, rdd.partitions(i))
         }
@@ -126,11 +128,11 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
         val it = rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]]
         rddIterators += ((it, depNum))
 
-      case ShuffleCoGroupSplitDep(shuffleId) =>
+      case ShuffleCoGroupSplitDep(handle) =>
         // Read map outputs of shuffle
-        val fetcher = SparkEnv.get.shuffleFetcher
-        val ser = Serializer.getSerializer(serializer)
-        val it = fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context, ser)
+        val it = SparkEnv.get.shuffleManager
+          .getReader(handle, split.index, split.index + 1, context)
+          .read()
         rddIterators += ((it, depNum))
     }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 8909980957058..b6ad9b6c3e168 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -118,6 +118,56 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     combineByKey(createCombiner, mergeValue, mergeCombiners, new HashPartitioner(numPartitions))
   }
 
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+   * as in scala.TraversableOnce. The former operation is used for merging values within a
+   * partition, and the latter is used for merging values between partitions. To avoid memory
+   * allocation, both of these functions are allowed to modify and return their first argument
+   * instead of creating a new U.
+   */
+  def aggregateByKey[U: ClassTag](zeroValue: U, partitioner: Partitioner)(seqOp: (U, V) => U,
+      combOp: (U, U) => U): RDD[(K, U)] = {
+    // Serialize the zero value to a byte array so that we can get a new clone of it on each key
+    val zeroBuffer = SparkEnv.get.closureSerializer.newInstance().serialize(zeroValue)
+    val zeroArray = new Array[Byte](zeroBuffer.limit)
+    zeroBuffer.get(zeroArray)
+
+    lazy val cachedSerializer = SparkEnv.get.closureSerializer.newInstance()
+    def createZero() = cachedSerializer.deserialize[U](ByteBuffer.wrap(zeroArray))
+
+    combineByKey[U]((v: V) => seqOp(createZero(), v), seqOp, combOp, partitioner)
+  }
+
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+   * as in scala.TraversableOnce. The former operation is used for merging values within a
+   * partition, and the latter is used for merging values between partitions. To avoid memory
+   * allocation, both of these functions are allowed to modify and return their first argument
+   * instead of creating a new U.
+   */
+  def aggregateByKey[U: ClassTag](zeroValue: U, numPartitions: Int)(seqOp: (U, V) => U,
+      combOp: (U, U) => U): RDD[(K, U)] = {
+    aggregateByKey(zeroValue, new HashPartitioner(numPartitions))(seqOp, combOp)
+  }
+
+  /**
+   * Aggregate the values of each key, using given combine functions and a neutral "zero value".
+   * This function can return a different result type, U, than the type of the values in this RDD,
+   * V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+   * as in scala.TraversableOnce. The former operation is used for merging values within a
+   * partition, and the latter is used for merging values between partitions. To avoid memory
+   * allocation, both of these functions are allowed to modify and return their first argument
+   * instead of creating a new U.
+   */
+  def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U, V) => U,
+      combOp: (U, U) => U): RDD[(K, U)] = {
+    aggregateByKey(zeroValue, defaultPartitioner(self))(seqOp, combOp)
+  }
+
   /**
    * Merge the values for each key using an associative function and a neutral "zero value" which
    * may be added to the result an arbitrary number of times, and must not change the result
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index fb12738f499f3..446f369c9ea16 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1190,7 +1190,7 @@ abstract class RDD[T: ClassTag](
 
   /** User code that created this RDD (e.g. `textFile`, `parallelize`). */
   @transient private[spark] val creationSiteInfo = Utils.getCallSiteInfo
-  private[spark] def getCreationSite: String = creationSiteInfo.toString
+  private[spark] def getCreationSite: String = Option(creationSiteInfo).getOrElse("").toString
 
   private[spark] def elementClassTag: ClassTag[T] = classTag[T]
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 802b0bdfb2d59..bb108ef163c56 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -42,10 +42,11 @@ class ShuffledRDD[K, V, P <: Product2[K, V] : ClassTag](
     part: Partitioner)
   extends RDD[P](prev.context, Nil) {
 
-  private var serializer: Serializer = null
+  private var serializer: Option[Serializer] = None
 
+  /** Set a serializer for this RDD's shuffle, or null to use the default (spark.serializer) */
   def setSerializer(serializer: Serializer): ShuffledRDD[K, V, P] = {
-    this.serializer = serializer
+    this.serializer = Option(serializer)
     this
   }
 
@@ -60,9 +61,10 @@ class ShuffledRDD[K, V, P <: Product2[K, V] : ClassTag](
   }
 
   override def compute(split: Partition, context: TaskContext): Iterator[P] = {
-    val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId
-    val ser = Serializer.getSerializer(serializer)
-    SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context, ser)
+    val dep = dependencies.head.asInstanceOf[ShuffleDependency[K, V, V]]
+    SparkEnv.get.shuffleManager.getReader(dep.shuffleHandle, split.index, split.index + 1, context)
+      .read()
+      .asInstanceOf[Iterator[P]]
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 9a09c05bbc959..ed24ea22a661c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -54,10 +54,11 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
     part: Partitioner)
   extends RDD[(K, V)](rdd1.context, Nil) {
 
-  private var serializer: Serializer = null
+  private var serializer: Option[Serializer] = None
 
+  /** Set a serializer for this RDD's shuffle, or null to use the default (spark.serializer) */
   def setSerializer(serializer: Serializer): SubtractedRDD[K, V, W] = {
-    this.serializer = serializer
+    this.serializer = Option(serializer)
     this
   }
 
@@ -79,8 +80,8 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
       // Each CoGroupPartition will depend on rdd1 and rdd2
       array(i) = new CoGroupPartition(i, Seq(rdd1, rdd2).zipWithIndex.map { case (rdd, j) =>
         dependencies(j) match {
-          case s: ShuffleDependency[_, _] =>
-            new ShuffleCoGroupSplitDep(s.shuffleId)
+          case s: ShuffleDependency[_, _, _] =>
+            new ShuffleCoGroupSplitDep(s.shuffleHandle)
           case _ =>
             new NarrowCoGroupSplitDep(rdd, i, rdd.partitions(i))
         }
@@ -93,7 +94,6 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
 
   override def compute(p: Partition, context: TaskContext): Iterator[(K, V)] = {
     val partition = p.asInstanceOf[CoGroupPartition]
-    val ser = Serializer.getSerializer(serializer)
     val map = new JHashMap[K, ArrayBuffer[V]]
     def getSeq(k: K): ArrayBuffer[V] = {
       val seq = map.get(k)
@@ -109,9 +109,10 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
       case NarrowCoGroupSplitDep(rdd, _, itsSplit) =>
         rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, V]]].foreach(op)
 
-      case ShuffleCoGroupSplitDep(shuffleId) =>
-        val iter = SparkEnv.get.shuffleFetcher.fetch[Product2[K, V]](shuffleId, partition.index,
-          context, ser)
+      case ShuffleCoGroupSplitDep(handle) =>
+        val iter = SparkEnv.get.shuffleManager
+          .getReader(handle, partition.index, partition.index + 1, context)
+          .read()
         iter.foreach(op)
     }
     // the first dep is rdd1; add all values to the map
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index e09a4221e8315..3c85b5a2ae776 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -190,7 +190,7 @@ class DAGScheduler(
    * The jobId value passed in will be used if the stage doesn't already exist with
    * a lower jobId (jobId always increases across jobs.)
    */
-  private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], jobId: Int): Stage = {
+  private def getShuffleMapStage(shuffleDep: ShuffleDependency[_, _, _], jobId: Int): Stage = {
     shuffleToMapStage.get(shuffleDep.shuffleId) match {
       case Some(stage) => stage
       case None =>
@@ -210,7 +210,7 @@ class DAGScheduler(
   private def newStage(
       rdd: RDD[_],
       numTasks: Int,
-      shuffleDep: Option[ShuffleDependency[_,_]],
+      shuffleDep: Option[ShuffleDependency[_, _, _]],
       jobId: Int,
       callSite: Option[String] = None)
     : Stage =
@@ -233,7 +233,7 @@ class DAGScheduler(
   private def newOrUsedStage(
       rdd: RDD[_],
       numTasks: Int,
-      shuffleDep: ShuffleDependency[_,_],
+      shuffleDep: ShuffleDependency[_, _, _],
       jobId: Int,
       callSite: Option[String] = None)
     : Stage =
@@ -269,7 +269,7 @@ class DAGScheduler(
         // we can't do it in its constructor because # of partitions is unknown
         for (dep <- r.dependencies) {
           dep match {
-            case shufDep: ShuffleDependency[_,_] =>
+            case shufDep: ShuffleDependency[_, _, _] =>
               parents += getShuffleMapStage(shufDep, jobId)
             case _ =>
               visit(dep.rdd)
@@ -290,7 +290,7 @@ class DAGScheduler(
         if (getCacheLocs(rdd).contains(Nil)) {
           for (dep <- rdd.dependencies) {
             dep match {
-              case shufDep: ShuffleDependency[_,_] =>
+              case shufDep: ShuffleDependency[_, _, _] =>
                 val mapStage = getShuffleMapStage(shufDep, stage.jobId)
                 if (!mapStage.isAvailable) {
                   missing += mapStage
@@ -1088,7 +1088,7 @@ class DAGScheduler(
         visitedRdds += rdd
         for (dep <- rdd.dependencies) {
           dep match {
-            case shufDep: ShuffleDependency[_,_] =>
+            case shufDep: ShuffleDependency[_, _, _] =>
               val mapStage = getShuffleMapStage(shufDep, stage.jobId)
               if (!mapStage.isAvailable) {
                 visitedStages += mapStage
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index ed0f56f1abdf5..0098b5a59d1a5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -29,6 +29,7 @@ import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.rdd.{RDD, RDDCheckpointData}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage._
+import org.apache.spark.shuffle.ShuffleWriter
 
 private[spark] object ShuffleMapTask {
 
@@ -37,7 +38,7 @@ private[spark] object ShuffleMapTask {
   // expensive on the master node if it needs to launch thousands of tasks.
   private val serializedInfoCache = new HashMap[Int, Array[Byte]]
 
-  def serializeInfo(stageId: Int, rdd: RDD[_], dep: ShuffleDependency[_,_]): Array[Byte] = {
+  def serializeInfo(stageId: Int, rdd: RDD[_], dep: ShuffleDependency[_, _, _]): Array[Byte] = {
     synchronized {
       val old = serializedInfoCache.get(stageId).orNull
       if (old != null) {
@@ -56,12 +57,12 @@ private[spark] object ShuffleMapTask {
     }
   }
 
-  def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], ShuffleDependency[_,_]) = {
+  def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], ShuffleDependency[_, _, _]) = {
     val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
     val ser = SparkEnv.get.closureSerializer.newInstance()
     val objIn = ser.deserializeStream(in)
     val rdd = objIn.readObject().asInstanceOf[RDD[_]]
-    val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_,_]]
+    val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_, _, _]]
     (rdd, dep)
   }
 
@@ -99,7 +100,7 @@ private[spark] object ShuffleMapTask {
 private[spark] class ShuffleMapTask(
     stageId: Int,
     var rdd: RDD[_],
-    var dep: ShuffleDependency[_,_],
+    var dep: ShuffleDependency[_, _, _],
     _partitionId: Int,
     @transient private var locs: Seq[TaskLocation])
   extends Task[MapStatus](stageId, _partitionId)
@@ -141,66 +142,22 @@ private[spark] class ShuffleMapTask(
   }
 
   override def runTask(context: TaskContext): MapStatus = {
-    val numOutputSplits = dep.partitioner.numPartitions
     metrics = Some(context.taskMetrics)
-
-    val blockManager = SparkEnv.get.blockManager
-    val shuffleBlockManager = blockManager.shuffleBlockManager
-    var shuffle: ShuffleWriterGroup = null
-    var success = false
-
+    var writer: ShuffleWriter[Any, Any] = null
     try {
-      // Obtain all the block writers for shuffle blocks.
-      val ser = Serializer.getSerializer(dep.serializer)
-      shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, partitionId, numOutputSplits, ser)
-
-      // Write the map output to its associated buckets.
+      val manager = SparkEnv.get.shuffleManager
+      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
       for (elem <- rdd.iterator(split, context)) {
-        val pair = elem.asInstanceOf[Product2[Any, Any]]
-        val bucketId = dep.partitioner.getPartition(pair._1)
-        shuffle.writers(bucketId).write(pair)
-      }
-
-      // Commit the writes. Get the size of each bucket block (total block size).
-      var totalBytes = 0L
-      var totalTime = 0L
-      val compressedSizes: Array[Byte] = shuffle.writers.map { writer: BlockObjectWriter =>
-        writer.commit()
-        writer.close()
-        val size = writer.fileSegment().length
-        totalBytes += size
-        totalTime += writer.timeWriting()
-        MapOutputTracker.compressSize(size)
+        writer.write(elem.asInstanceOf[Product2[Any, Any]])
       }
-
-      // Update shuffle metrics.
-      val shuffleMetrics = new ShuffleWriteMetrics
-      shuffleMetrics.shuffleBytesWritten = totalBytes
-      shuffleMetrics.shuffleWriteTime = totalTime
-      metrics.get.shuffleWriteMetrics = Some(shuffleMetrics)
-
-      success = true
-      new MapStatus(blockManager.blockManagerId, compressedSizes)
-    } catch { case e: Exception =>
-      // If there is an exception from running the task, revert the partial writes
-      // and throw the exception upstream to Spark.
-      if (shuffle != null && shuffle.writers != null) {
-        for (writer <- shuffle.writers) {
-          writer.revertPartialWrites()
-          writer.close()
+      return writer.stop(success = true).get
+    } catch {
+      case e: Exception =>
+        if (writer != null) {
+          writer.stop(success = false)
         }
-      }
-      throw e
+        throw e
     } finally {
-      // Release the writers back to the shuffle block manager.
-      if (shuffle != null && shuffle.writers != null) {
-        try {
-          shuffle.releaseWriters(success)
-        } catch {
-          case e: Exception => logError("Failed to release shuffle writers", e)
-        }
-      }
-      // Execute the callbacks on task completion.
       context.executeOnCompleteCallbacks()
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 5c1fc30e4a557..3bf9713f728c6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -40,7 +40,7 @@ private[spark] class Stage(
     val id: Int,
     val rdd: RDD[_],
     val numTasks: Int,
-    val shuffleDep: Option[ShuffleDependency[_,_]],  // Output shuffle if stage is a map stage
+    val shuffleDep: Option[ShuffleDependency[_, _, _]],  // Output shuffle if stage is a map stage
     val parents: List[Stage],
     val jobId: Int,
     callSite: Option[String])
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 99d305b36a959..df59f444b7a0e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -71,7 +71,8 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
             val loader = Thread.currentThread.getContextClassLoader
             taskSetManager.abort("ClassNotFound with classloader: " + loader)
           case ex: Exception =>
-            taskSetManager.abort("Exception while deserializing and fetching task: %s".format(ex))
+            logError("Exception while getting task result", ex)
+            taskSetManager.abort("Exception while getting task result: %s".format(ex))
         }
       }
     })
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index ee26970a3d874..f2f5cea469c61 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -52,6 +52,10 @@ object Serializer {
   def getSerializer(serializer: Serializer): Serializer = {
     if (serializer == null) SparkEnv.get.serializer else serializer
   }
+
+  def getSerializer(serializer: Option[Serializer]): Serializer = {
+    serializer.getOrElse(SparkEnv.get.serializer)
+  }
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
similarity index 66%
rename from core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
rename to core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
index a4f69b6b22b2c..b36c457d6d514 100644
--- a/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
@@ -15,22 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.shuffle
 
+import org.apache.spark.{ShuffleDependency, Aggregator, Partitioner}
 import org.apache.spark.serializer.Serializer
 
-private[spark] abstract class ShuffleFetcher {
-
-  /**
-   * Fetch the shuffle outputs for a given ShuffleDependency.
-   * @return An iterator over the elements of the fetched shuffle outputs.
-   */
-  def fetch[T](
-      shuffleId: Int,
-      reduceId: Int,
-      context: TaskContext,
-      serializer: Serializer = SparkEnv.get.serializer): Iterator[T]
-
-  /** Stop the fetcher */
-  def stop() {}
-}
+/**
+ * A basic ShuffleHandle implementation that just captures registerShuffle's parameters.
+ */
+private[spark] class BaseShuffleHandle[K, V, C](
+    shuffleId: Int,
+    val numMaps: Int,
+    val dependency: ShuffleDependency[K, V, C])
+  extends ShuffleHandle(shuffleId)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleHandle.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleHandle.scala
new file mode 100644
index 0000000000000..13c7115f88afa
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleHandle.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+/**
+ * An opaque handle to a shuffle, used by a ShuffleManager to pass information about it to tasks.
+ *
+ * @param shuffleId ID of the shuffle
+ */
+private[spark] abstract class ShuffleHandle(val shuffleId: Int) extends Serializable {}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
new file mode 100644
index 0000000000000..9c859b8b4a118
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import org.apache.spark.{TaskContext, ShuffleDependency}
+
+/**
+ * Pluggable interface for shuffle systems. A ShuffleManager is created in SparkEnv on both the
+ * driver and executors, based on the spark.shuffle.manager setting. The driver registers shuffles
+ * with it, and executors (or tasks running locally in the driver) can ask to read and write data.
+ *
+ * NOTE: this will be instantiated by SparkEnv so its constructor can take a SparkConf and
+ * boolean isDriver as parameters.
+ */
+private[spark] trait ShuffleManager {
+  /**
+   * Register a shuffle with the manager and obtain a handle for it to pass to tasks.
+   */
+  def registerShuffle[K, V, C](
+      shuffleId: Int,
+      numMaps: Int,
+      dependency: ShuffleDependency[K, V, C]): ShuffleHandle
+
+  /** Get a writer for a given partition. Called on executors by map tasks. */
+  def getWriter[K, V](handle: ShuffleHandle, mapId: Int, context: TaskContext): ShuffleWriter[K, V]
+
+  /**
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive).
+   * Called on executors by reduce tasks.
+   */
+  def getReader[K, C](
+      handle: ShuffleHandle,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext): ShuffleReader[K, C]
+
+  /** Remove a shuffle's metadata from the ShuffleManager. */
+  def unregisterShuffle(shuffleId: Int)
+
+  /** Shut down this ShuffleManager. */
+  def stop(): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
new file mode 100644
index 0000000000000..b30e366d06006
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+/**
+ * Obtained inside a reduce task to read combined records from the mappers.
+ */
+private[spark] trait ShuffleReader[K, C] {
+  /** Read the combined key-values for this reduce task */
+  def read(): Iterator[Product2[K, C]]
+
+  /** Close this reader */
+  def stop(): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
new file mode 100644
index 0000000000000..ead3ebd652ca5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import org.apache.spark.scheduler.MapStatus
+
+/**
+ * Obtained inside a map task to write out records to the shuffle system.
+ */
+private[spark] trait ShuffleWriter[K, V] {
+  /** Write a record to this task's output */
+  def write(record: Product2[K, V]): Unit
+
+  /** Close this writer, passing along whether the map completed */
+  def stop(success: Boolean): Option[MapStatus]
+}
diff --git a/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
similarity index 96%
rename from core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
rename to core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
index a67392441ed29..b05b6ea345df3 100644
--- a/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark
+package org.apache.spark.shuffle.hash
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
@@ -24,17 +24,16 @@ import org.apache.spark.executor.ShuffleReadMetrics
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
 import org.apache.spark.util.CompletionIterator
+import org.apache.spark._
 
-private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging {
-
-  override def fetch[T](
+private[hash] object BlockStoreShuffleFetcher extends Logging {
+  def fetch[T](
       shuffleId: Int,
       reduceId: Int,
       context: TaskContext,
       serializer: Serializer)
     : Iterator[T] =
   {
-
     logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId))
     val blockManager = SparkEnv.get.blockManager
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
new file mode 100644
index 0000000000000..5b0940ecce29d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.hash
+
+import org.apache.spark._
+import org.apache.spark.shuffle._
+
+/**
+ * A ShuffleManager using hashing, that creates one output file per reduce partition on each
+ * mapper (possibly reusing these across waves of tasks).
+ */
+class HashShuffleManager(conf: SparkConf) extends ShuffleManager {
+  /* Register a shuffle with the manager and obtain a handle for it to pass to tasks. */
+  override def registerShuffle[K, V, C](
+      shuffleId: Int,
+      numMaps: Int,
+      dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
+    new BaseShuffleHandle(shuffleId, numMaps, dependency)
+  }
+
+  /**
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive).
+   * Called on executors by reduce tasks.
+   */
+  override def getReader[K, C](
+      handle: ShuffleHandle,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext): ShuffleReader[K, C] = {
+    new HashShuffleReader(
+      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], startPartition, endPartition, context)
+  }
+
+  /** Get a writer for a given partition. Called on executors by map tasks. */
+  override def getWriter[K, V](handle: ShuffleHandle, mapId: Int, context: TaskContext)
+      : ShuffleWriter[K, V] = {
+    new HashShuffleWriter(handle.asInstanceOf[BaseShuffleHandle[K, V, _]], mapId, context)
+  }
+
+  /** Remove a shuffle's metadata from the ShuffleManager. */
+  override def unregisterShuffle(shuffleId: Int): Unit = {}
+
+  /** Shut down this ShuffleManager. */
+  override def stop(): Unit = {}
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
new file mode 100644
index 0000000000000..f6a790309a587
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.hash
+
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleReader}
+import org.apache.spark.TaskContext
+
+class HashShuffleReader[K, C](
+    handle: BaseShuffleHandle[K, _, C],
+    startPartition: Int,
+    endPartition: Int,
+    context: TaskContext)
+  extends ShuffleReader[K, C]
+{
+  require(endPartition == startPartition + 1,
+    "Hash shuffle currently only supports fetching one partition")
+
+  /** Read the combined key-values for this reduce task */
+  override def read(): Iterator[Product2[K, C]] = {
+    BlockStoreShuffleFetcher.fetch(handle.shuffleId, startPartition, context,
+      Serializer.getSerializer(handle.dependency.serializer))
+  }
+
+  /** Close this reader */
+  override def stop(): Unit = ???
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
new file mode 100644
index 0000000000000..4c6749098c110
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.hash
+
+import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleWriter}
+import org.apache.spark.{Logging, MapOutputTracker, SparkEnv, TaskContext}
+import org.apache.spark.storage.{BlockObjectWriter}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.scheduler.MapStatus
+
+class HashShuffleWriter[K, V](
+    handle: BaseShuffleHandle[K, V, _],
+    mapId: Int,
+    context: TaskContext)
+  extends ShuffleWriter[K, V] with Logging {
+
+  private val dep = handle.dependency
+  private val numOutputSplits = dep.partitioner.numPartitions
+  private val metrics = context.taskMetrics
+  private var stopping = false
+
+  private val blockManager = SparkEnv.get.blockManager
+  private val shuffleBlockManager = blockManager.shuffleBlockManager
+  private val ser = Serializer.getSerializer(dep.serializer.getOrElse(null))
+  private val shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser)
+
+  /** Write a record to this task's output */
+  override def write(record: Product2[K, V]): Unit = {
+    val pair = record.asInstanceOf[Product2[Any, Any]]
+    val bucketId = dep.partitioner.getPartition(pair._1)
+    shuffle.writers(bucketId).write(pair)
+  }
+
+  /** Close this writer, passing along whether the map completed */
+  override def stop(success: Boolean): Option[MapStatus] = {
+    try {
+      if (stopping) {
+        return None
+      }
+      stopping = true
+      if (success) {
+        try {
+          return Some(commitWritesAndBuildStatus())
+        } catch {
+          case e: Exception =>
+            revertWrites()
+            throw e
+        }
+      } else {
+        revertWrites()
+        return None
+      }
+    } finally {
+      // Release the writers back to the shuffle block manager.
+      if (shuffle != null && shuffle.writers != null) {
+        try {
+          shuffle.releaseWriters(success)
+        } catch {
+          case e: Exception => logError("Failed to release shuffle writers", e)
+        }
+      }
+    }
+  }
+
+  private def commitWritesAndBuildStatus(): MapStatus = {
+    // Commit the writes. Get the size of each bucket block (total block size).
+    var totalBytes = 0L
+    var totalTime = 0L
+    val compressedSizes = shuffle.writers.map { writer: BlockObjectWriter =>
+      writer.commit()
+      writer.close()
+      val size = writer.fileSegment().length
+      totalBytes += size
+      totalTime += writer.timeWriting()
+      MapOutputTracker.compressSize(size)
+    }
+
+    // Update shuffle metrics.
+    val shuffleMetrics = new ShuffleWriteMetrics
+    shuffleMetrics.shuffleBytesWritten = totalBytes
+    shuffleMetrics.shuffleWriteTime = totalTime
+    metrics.shuffleWriteMetrics = Some(shuffleMetrics)
+
+    new MapStatus(blockManager.blockManagerId, compressedSizes)
+  }
+
+  private def revertWrites(): Unit = {
+    if (shuffle != null && shuffle.writers != null) {
+      for (writer <- shuffle.writers) {
+        writer.revertPartialWrites()
+        writer.close()
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 3b1b6df089b8e..4ce28bb0cf059 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -862,6 +862,59 @@ private[spark] object Utils extends Logging {
     Source.fromBytes(buff).mkString
   }
 
+  /**
+   * Return a string containing data across a set of files. The `startIndex`
+   * and `endIndex` is based on the cumulative size of all the files take in
+   * the given order. See figure below for more details.
+   */
+  def offsetBytes(files: Seq[File], start: Long, end: Long): String = {
+    val fileLengths = files.map { _.length }
+    val startIndex = math.max(start, 0)
+    val endIndex = math.min(end, fileLengths.sum)
+    val fileToLength = files.zip(fileLengths).toMap
+    logDebug("Log files: \n" + fileToLength.mkString("\n"))
+
+    val stringBuffer = new StringBuffer((endIndex - startIndex).toInt)
+    var sum = 0L
+    for (file <- files) {
+      val startIndexOfFile = sum
+      val endIndexOfFile = sum + fileToLength(file)
+      logDebug(s"Processing file $file, " +
+        s"with start index = $startIndexOfFile, end index = $endIndex")
+
+      /*
+                                      ____________
+       range 1:                      |            |
+                                     |   case A   |
+
+       files:   |==== file 1 ====|====== file 2 ======|===== file 3 =====|
+
+                     |   case B  .       case C       .    case D    |
+       range 2:      |___________.____________________.______________|
+       */
+
+      if (startIndex <= startIndexOfFile  && endIndex >= endIndexOfFile) {
+        // Case C: read the whole file
+        stringBuffer.append(offsetBytes(file.getAbsolutePath, 0, fileToLength(file)))
+      } else if (startIndex > startIndexOfFile && startIndex < endIndexOfFile) {
+        // Case A and B: read from [start of required range] to [end of file / end of range]
+        val effectiveStartIndex = startIndex - startIndexOfFile
+        val effectiveEndIndex = math.min(endIndex - startIndexOfFile, fileToLength(file))
+        stringBuffer.append(Utils.offsetBytes(
+          file.getAbsolutePath, effectiveStartIndex, effectiveEndIndex))
+      } else if (endIndex > startIndexOfFile && endIndex < endIndexOfFile) {
+        // Case D: read from [start of file] to [end of require range]
+        val effectiveStartIndex = math.max(startIndex - startIndexOfFile, 0)
+        val effectiveEndIndex = endIndex - startIndexOfFile
+        stringBuffer.append(Utils.offsetBytes(
+          file.getAbsolutePath, effectiveStartIndex, effectiveEndIndex))
+      }
+      sum += fileToLength(file)
+      logDebug(s"After processing file $file, string built is ${stringBuffer.toString}}")
+    }
+    stringBuffer.toString
+  }
+
   /**
    * Clone an object using a Spark serializer.
    */
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
new file mode 100644
index 0000000000000..8e9c3036d09c2
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.logging
+
+import java.io.{File, FileOutputStream, InputStream}
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.util.{IntParam, Utils}
+
+/**
+ * Continuously appends the data from an input stream into the given file.
+ */
+private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSize: Int = 8192)
+  extends Logging {
+  @volatile private var outputStream: FileOutputStream = null
+  @volatile private var markedForStop = false     // has the appender been asked to stopped
+  @volatile private var stopped = false           // has the appender stopped
+
+  // Thread that reads the input stream and writes to file
+  private val writingThread = new Thread("File appending thread for " + file) {
+    setDaemon(true)
+    override def run() {
+      Utils.logUncaughtExceptions {
+        appendStreamToFile()
+      }
+    }
+  }
+  writingThread.start()
+
+  /**
+   * Wait for the appender to stop appending, either because input stream is closed
+   * or because of any error in appending
+   */
+  def awaitTermination() {
+    synchronized {
+      if (!stopped) {
+        wait()
+      }
+    }
+  }
+
+  /** Stop the appender */
+  def stop() {
+    markedForStop = true
+  }
+
+  /** Continuously read chunks from the input stream and append to the file */
+  protected def appendStreamToFile() {
+    try {
+      logDebug("Started appending thread")
+      openFile()
+      val buf = new Array[Byte](bufferSize)
+      var n = 0
+      while (!markedForStop && n != -1) {
+        n = inputStream.read(buf)
+        if (n != -1) {
+          appendToFile(buf, n)
+        }
+      }
+    } catch {
+      case e: Exception =>
+        logError(s"Error writing stream to file $file", e)
+    } finally {
+      closeFile()
+      synchronized {
+        stopped = true
+        notifyAll()
+      }
+    }
+  }
+
+  /** Append bytes to the file output stream */
+  protected def appendToFile(bytes: Array[Byte], len: Int) {
+    if (outputStream == null) {
+      openFile()
+    }
+    outputStream.write(bytes, 0, len)
+  }
+
+  /** Open the file output stream */
+  protected def openFile() {
+    outputStream = new FileOutputStream(file, false)
+    logDebug(s"Opened file $file")
+  }
+
+  /** Close the file output stream */
+  protected def closeFile() {
+    outputStream.flush()
+    outputStream.close()
+    logDebug(s"Closed file $file")
+  }
+}
+
+/**
+ * Companion object to [[org.apache.spark.util.logging.FileAppender]] which has helper
+ * functions to choose the correct type of FileAppender based on SparkConf configuration.
+ */
+private[spark] object FileAppender extends Logging {
+
+  /** Create the right appender based on Spark configuration */
+  def apply(inputStream: InputStream, file: File, conf: SparkConf): FileAppender = {
+
+    import RollingFileAppender._
+
+    val rollingStrategy = conf.get(STRATEGY_PROPERTY, STRATEGY_DEFAULT)
+    val rollingSizeBytes = conf.get(SIZE_PROPERTY, STRATEGY_DEFAULT)
+    val rollingInterval = conf.get(INTERVAL_PROPERTY, INTERVAL_DEFAULT)
+
+    def createTimeBasedAppender() = {
+      val validatedParams: Option[(Long, String)] = rollingInterval match {
+        case "daily" =>
+          logInfo(s"Rolling executor logs enabled for $file with daily rolling")
+          Some(24 * 60 * 60 * 1000L, "--YYYY-MM-dd")
+        case "hourly" =>
+          logInfo(s"Rolling executor logs enabled for $file with hourly rolling")
+          Some(60 * 60 * 1000L, "--YYYY-MM-dd--HH")
+        case "minutely" =>
+          logInfo(s"Rolling executor logs enabled for $file with rolling every minute")
+          Some(60 * 1000L, "--YYYY-MM-dd--HH-mm")
+        case IntParam(seconds) =>
+          logInfo(s"Rolling executor logs enabled for $file with rolling $seconds seconds")
+          Some(seconds * 1000L, "--YYYY-MM-dd--HH-mm-ss")
+        case _ =>
+          logWarning(s"Illegal interval for rolling executor logs [$rollingInterval], " +
+              s"rolling logs not enabled")
+          None
+      }
+      validatedParams.map {
+        case (interval, pattern) =>
+          new RollingFileAppender(
+            inputStream, file, new TimeBasedRollingPolicy(interval, pattern), conf)
+      }.getOrElse {
+        new FileAppender(inputStream, file)
+      }
+    }
+
+    def createSizeBasedAppender() = {
+      rollingSizeBytes match {
+        case IntParam(bytes) =>
+          logInfo(s"Rolling executor logs enabled for $file with rolling every $bytes bytes")
+          new RollingFileAppender(inputStream, file, new SizeBasedRollingPolicy(bytes), conf)
+        case _ =>
+          logWarning(
+            s"Illegal size [$rollingSizeBytes] for rolling executor logs, rolling logs not enabled")
+          new FileAppender(inputStream, file)
+      }
+    }
+
+    rollingStrategy match {
+      case "" =>
+        new FileAppender(inputStream, file)
+      case "time" =>
+        createTimeBasedAppender()
+      case "size" =>
+        createSizeBasedAppender()
+      case _ =>
+        logWarning(
+          s"Illegal strategy [$rollingStrategy] for rolling executor logs, " +
+            s"rolling logs not enabled")
+        new FileAppender(inputStream, file)
+    }
+  }
+}
+
+
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
new file mode 100644
index 0000000000000..1bbbd20cf076f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.logging
+
+import java.io.{File, FileFilter, InputStream}
+
+import org.apache.commons.io.FileUtils
+import org.apache.spark.SparkConf
+import RollingFileAppender._
+
+/**
+ * Continuously appends data from input stream into the given file, and rolls
+ * over the file after the given interval. The rolled over files are named
+ * based on the given pattern.
+ *
+ * @param inputStream             Input stream to read data from
+ * @param activeFile              File to write data to
+ * @param rollingPolicy           Policy based on which files will be rolled over.
+ * @param conf                    SparkConf that is used to pass on extra configurations
+ * @param bufferSize              Optional buffer size. Used mainly for testing.
+ */
+private[spark] class RollingFileAppender(
+    inputStream: InputStream,
+    activeFile: File,
+    val rollingPolicy: RollingPolicy,
+    conf: SparkConf,
+    bufferSize: Int = DEFAULT_BUFFER_SIZE
+  ) extends FileAppender(inputStream, activeFile, bufferSize) {
+
+  private val maxRetainedFiles = conf.getInt(RETAINED_FILES_PROPERTY, -1)
+
+  /** Stop the appender */
+  override def stop() {
+    super.stop()
+  }
+
+  /** Append bytes to file after rolling over is necessary */
+  override protected def appendToFile(bytes: Array[Byte], len: Int) {
+    if (rollingPolicy.shouldRollover(len)) {
+      rollover()
+      rollingPolicy.rolledOver()
+    }
+    super.appendToFile(bytes, len)
+    rollingPolicy.bytesWritten(len)
+  }
+
+  /** Rollover the file, by closing the output stream and moving it over */
+  private def rollover() {
+    try {
+      closeFile()
+      moveFile()
+      openFile()
+      if (maxRetainedFiles > 0) {
+        deleteOldFiles()
+      }
+    } catch {
+      case e: Exception =>
+        logError(s"Error rolling over $activeFile", e)
+    }
+  }
+
+  /** Move the active log file to a new rollover file */
+  private def moveFile() {
+    val rolloverSuffix = rollingPolicy.generateRolledOverFileSuffix()
+    val rolloverFile = new File(
+      activeFile.getParentFile, activeFile.getName + rolloverSuffix).getAbsoluteFile
+    try {
+      logDebug(s"Attempting to rollover file $activeFile to file $rolloverFile")
+      if (activeFile.exists) {
+        if (!rolloverFile.exists) {
+          FileUtils.moveFile(activeFile, rolloverFile)
+          logInfo(s"Rolled over $activeFile to $rolloverFile")
+        } else {
+          // In case the rollover file name clashes, make a unique file name.
+          // The resultant file names are long and ugly, so this is used only
+          // if there is a name collision. This can be avoided by the using
+          // the right pattern such that name collisions do not occur.
+          var i = 0
+          var altRolloverFile: File = null
+          do {
+            altRolloverFile = new File(activeFile.getParent,
+              s"${activeFile.getName}$rolloverSuffix--$i").getAbsoluteFile
+            i += 1
+          } while (i < 10000 && altRolloverFile.exists)
+
+          logWarning(s"Rollover file $rolloverFile already exists, " +
+            s"rolled over $activeFile to file $altRolloverFile")
+          FileUtils.moveFile(activeFile, altRolloverFile)
+        }
+      } else {
+        logWarning(s"File $activeFile does not exist")
+      }
+    }
+  }
+
+  /** Retain only last few files */
+  private[util] def deleteOldFiles() {
+    try {
+      val rolledoverFiles = activeFile.getParentFile.listFiles(new FileFilter {
+        def accept(f: File): Boolean = {
+          f.getName.startsWith(activeFile.getName) && f != activeFile
+        }
+      }).sorted
+      val filesToBeDeleted = rolledoverFiles.take(
+        math.max(0, rolledoverFiles.size - maxRetainedFiles))
+      filesToBeDeleted.foreach { file =>
+        logInfo(s"Deleting file executor log file ${file.getAbsolutePath}")
+        file.delete()
+      }
+    } catch {
+      case e: Exception =>
+        logError("Error cleaning logs in directory " + activeFile.getParentFile.getAbsolutePath, e)
+    }
+  }
+}
+
+/**
+ * Companion object to [[org.apache.spark.util.logging.RollingFileAppender]]. Defines
+ * names of configurations that configure rolling file appenders.
+ */
+private[spark] object RollingFileAppender {
+  val STRATEGY_PROPERTY = "spark.executor.logs.rolling.strategy"
+  val STRATEGY_DEFAULT = ""
+  val INTERVAL_PROPERTY = "spark.executor.logs.rolling.time.interval"
+  val INTERVAL_DEFAULT = "daily"
+  val SIZE_PROPERTY = "spark.executor.logs.rolling.size.maxBytes"
+  val SIZE_DEFAULT = (1024 * 1024).toString
+  val RETAINED_FILES_PROPERTY = "spark.executor.logs.rolling.maxRetainedFiles"
+  val DEFAULT_BUFFER_SIZE = 8192
+
+  /**
+   * Get the sorted list of rolled over files. This assumes that the all the rolled
+   * over file names are prefixed with the `activeFileName`, and the active file
+   * name has the latest logs. So it sorts all the rolled over logs (that are
+   * prefixed with `activeFileName`) and appends the active file
+   */
+  def getSortedRolledOverFiles(directory: String, activeFileName: String): Seq[File] = {
+    val rolledOverFiles = new File(directory).getAbsoluteFile.listFiles.filter { file =>
+      val fileName = file.getName
+      fileName.startsWith(activeFileName) && fileName != activeFileName
+    }.sorted
+    val activeFile = {
+      val file = new File(directory, activeFileName).getAbsoluteFile
+      if (file.exists) Some(file) else None
+    }
+    rolledOverFiles ++ activeFile
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
new file mode 100644
index 0000000000000..84e5c3c917dcb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.logging
+
+import java.text.SimpleDateFormat
+import java.util.Calendar
+
+import org.apache.spark.Logging
+
+/**
+ * Defines the policy based on which [[org.apache.spark.util.logging.RollingFileAppender]] will
+ * generate rolling files.
+ */
+private[spark] trait RollingPolicy {
+
+  /** Whether rollover should be initiated at this moment */
+  def shouldRollover(bytesToBeWritten: Long): Boolean
+
+  /** Notify that rollover has occurred */
+  def rolledOver()
+
+  /** Notify that bytes have been written */
+  def bytesWritten(bytes: Long)
+
+  /** Get the desired name of the rollover file */
+  def generateRolledOverFileSuffix(): String
+}
+
+/**
+ * Defines a [[org.apache.spark.util.logging.RollingPolicy]] by which files will be rolled
+ * over at a fixed interval.
+ */
+private[spark] class TimeBasedRollingPolicy(
+    var rolloverIntervalMillis: Long,
+    rollingFileSuffixPattern: String,
+    checkIntervalConstraint: Boolean = true   // set to false while testing
+  ) extends RollingPolicy with Logging {
+
+  import TimeBasedRollingPolicy._
+  if (checkIntervalConstraint && rolloverIntervalMillis < MINIMUM_INTERVAL_SECONDS * 1000L) {
+    logWarning(s"Rolling interval [${rolloverIntervalMillis/1000L} seconds] is too small. " +
+      s"Setting the interval to the acceptable minimum of $MINIMUM_INTERVAL_SECONDS seconds.")
+    rolloverIntervalMillis = MINIMUM_INTERVAL_SECONDS * 1000L
+  }
+
+  @volatile private var nextRolloverTime = calculateNextRolloverTime()
+  private val formatter = new SimpleDateFormat(rollingFileSuffixPattern)
+
+  /** Should rollover if current time has exceeded next rollover time */
+  def shouldRollover(bytesToBeWritten: Long): Boolean = {
+    System.currentTimeMillis > nextRolloverTime
+  }
+
+  /** Rollover has occurred, so find the next time to rollover */
+  def rolledOver() {
+    nextRolloverTime = calculateNextRolloverTime()
+    logDebug(s"Current time: ${System.currentTimeMillis}, next rollover time: " + nextRolloverTime)
+  }
+
+  def bytesWritten(bytes: Long) { }  // nothing to do
+
+  private def calculateNextRolloverTime(): Long = {
+    val now = System.currentTimeMillis()
+    val targetTime = (
+      math.ceil(now.toDouble / rolloverIntervalMillis) * rolloverIntervalMillis
+    ).toLong
+    logDebug(s"Next rollover time is $targetTime")
+    targetTime
+  }
+
+  def generateRolledOverFileSuffix(): String = {
+    formatter.format(Calendar.getInstance.getTime)
+  }
+}
+
+private[spark] object TimeBasedRollingPolicy {
+  val MINIMUM_INTERVAL_SECONDS = 60L  // 1 minute
+}
+
+/**
+ * Defines a [[org.apache.spark.util.logging.RollingPolicy]] by which files will be rolled
+ * over after reaching a particular size.
+ */
+private[spark] class SizeBasedRollingPolicy(
+    var rolloverSizeBytes: Long,
+    checkSizeConstraint: Boolean = true     // set to false while testing
+  ) extends RollingPolicy with Logging {
+
+  import SizeBasedRollingPolicy._
+  if (checkSizeConstraint && rolloverSizeBytes < MINIMUM_SIZE_BYTES) {
+    logWarning(s"Rolling size [$rolloverSizeBytes bytes] is too small. " +
+      s"Setting the size to the acceptable minimum of $MINIMUM_SIZE_BYTES bytes.")
+    rolloverSizeBytes = MINIMUM_SIZE_BYTES
+  }
+
+  @volatile private var bytesWrittenSinceRollover = 0L
+  val formatter = new SimpleDateFormat("--YYYY-MM-dd--HH-mm-ss--SSSS")
+
+  /** Should rollover if the next set of bytes is going to exceed the size limit */
+  def shouldRollover(bytesToBeWritten: Long): Boolean = {
+    logInfo(s"$bytesToBeWritten + $bytesWrittenSinceRollover > $rolloverSizeBytes")
+    bytesToBeWritten + bytesWrittenSinceRollover > rolloverSizeBytes
+  }
+
+  /** Rollover has occurred, so reset the counter */
+  def rolledOver() {
+    bytesWrittenSinceRollover = 0
+  }
+
+  /** Increment the bytes that have been written in the current file */
+  def bytesWritten(bytes: Long) {
+    bytesWrittenSinceRollover += bytes
+  }
+
+  /** Get the desired name of the rollover file */
+  def generateRolledOverFileSuffix(): String = {
+    formatter.format(Calendar.getInstance.getTime)
+  }
+}
+
+private[spark] object SizeBasedRollingPolicy {
+  val MINIMUM_SIZE_BYTES = RollingFileAppender.DEFAULT_BUFFER_SIZE * 10
+}
+
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index 50a62129116f1..ef41bfb88de9d 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -317,6 +317,37 @@ public Integer call(Integer a, Integer b) {
     Assert.assertEquals(33, sum);
   }
 
+  @Test
+  public void aggregateByKey() {
+    JavaPairRDD<Integer, Integer> pairs = sc.parallelizePairs(
+      Arrays.asList(
+        new Tuple2<Integer, Integer>(1, 1),
+        new Tuple2<Integer, Integer>(1, 1),
+        new Tuple2<Integer, Integer>(3, 2),
+        new Tuple2<Integer, Integer>(5, 1),
+        new Tuple2<Integer, Integer>(5, 3)), 2);
+
+    Map<Integer, Set<Integer>> sets = pairs.aggregateByKey(new HashSet<Integer>(),
+      new Function2<Set<Integer>, Integer, Set<Integer>>() {
+        @Override
+        public Set<Integer> call(Set<Integer> a, Integer b) {
+          a.add(b);
+          return a;
+        }
+      },
+      new Function2<Set<Integer>, Set<Integer>, Set<Integer>>() {
+        @Override
+        public Set<Integer> call(Set<Integer> a, Set<Integer> b) {
+          a.addAll(b);
+          return a;
+        }
+      }).collectAsMap();
+    Assert.assertEquals(3, sets.size());
+    Assert.assertEquals(new HashSet<Integer>(Arrays.asList(1)), sets.get(1));
+    Assert.assertEquals(new HashSet<Integer>(Arrays.asList(2)), sets.get(3));
+    Assert.assertEquals(new HashSet<Integer>(Arrays.asList(1, 3)), sets.get(5));
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void foldByKey() {
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 4e7c34e6d1ada..3aab88e9e9196 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -20,11 +20,11 @@ package org.apache.spark
 import scala.collection.mutable
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.SparkContext._
 
-class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
+class AccumulatorSuite extends FunSuite with Matchers with LocalSparkContext {
 
 
   implicit def setAccum[A] = new AccumulableParam[mutable.Set[A], A] {
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index dc2db66df60e0..13b415cccb647 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -201,7 +201,7 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
   def newPairRDD = newRDD.map(_ -> 1)
   def newShuffleRDD = newPairRDD.reduceByKey(_ + _)
   def newBroadcast = sc.broadcast(1 to 100)
-  def newRDDWithShuffleDependencies: (RDD[_], Seq[ShuffleDependency[_, _]]) = {
+  def newRDDWithShuffleDependencies: (RDD[_], Seq[ShuffleDependency[_, _, _]]) = {
     def getAllDependencies(rdd: RDD[_]): Seq[Dependency[_]] = {
       rdd.dependencies ++ rdd.dependencies.flatMap { dep =>
         getAllDependencies(dep.rdd)
@@ -211,8 +211,8 @@ class ContextCleanerSuite extends FunSuite with BeforeAndAfter with LocalSparkCo
 
     // Get all the shuffle dependencies
     val shuffleDeps = getAllDependencies(rdd)
-      .filter(_.isInstanceOf[ShuffleDependency[_, _]])
-      .map(_.asInstanceOf[ShuffleDependency[_, _]])
+      .filter(_.isInstanceOf[ShuffleDependency[_, _, _]])
+      .map(_.asInstanceOf[ShuffleDependency[_, _, _]])
     (rdd, shuffleDeps)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 14ddd6f1ec08f..41c294f727b3c 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import org.scalatest.BeforeAndAfter
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts._
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 import org.scalatest.time.{Millis, Span}
 
 import org.apache.spark.SparkContext._
@@ -31,7 +31,7 @@ class NotSerializableClass
 class NotSerializableExn(val notSer: NotSerializableClass) extends Throwable() {}
 
 
-class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
+class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
   with LocalSparkContext {
 
   val clusterUrl = "local-cluster[2,1,512]"
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 2c8ef405c944c..a57430e829ced 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -25,7 +25,7 @@ import scala.concurrent.duration._
 import scala.concurrent.future
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
@@ -35,7 +35,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
  * (e.g. count) as well as multi-job action (e.g. take). We test the local and cluster schedulers
  * in both FIFO and fair scheduling modes.
  */
-class JobCancellationSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
+class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
   with LocalSparkContext {
 
   override def afterEach() {
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index be6508a40ea61..47112ce66d695 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.MutablePair
 
-class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
+class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   val conf = new SparkConf(loadDefaults = false)
 
@@ -58,7 +58,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
     // default Java serializer cannot handle the non serializable class.
     val c = new ShuffledRDD[Int, NonJavaSerializableClass, (Int, NonJavaSerializableClass)](
       b, new HashPartitioner(NUM_BLOCKS)).setSerializer(new KryoSerializer(conf))
-    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
+    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
 
     assert(c.count === 10)
 
@@ -97,7 +97,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
     val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10))
       .setSerializer(new KryoSerializer(conf))
 
-    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
+    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
     assert(c.count === 4)
 
     val blockSizes = (0 until NUM_BLOCKS).flatMap { id =>
@@ -122,7 +122,7 @@ class ShuffleSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
     // NOTE: The default Java serializer should create zero-sized blocks
     val c = new ShuffledRDD[Int, Int, (Int, Int)](b, new HashPartitioner(10))
 
-    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[Int, Int]].shuffleId
+    val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
     assert(c.count === 4)
 
     val blockSizes = (0 until NUM_BLOCKS).flatMap { id =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
index d6b93f5fedd3b..4161aede1d1d0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.deploy
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
-class ClientSuite extends FunSuite with ShouldMatchers {
+class ClientSuite extends FunSuite with Matchers {
   test("correctly validates driver jar URL's") {
     ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true)
     ClientArguments.isValidJarUrl("file://some/path/to/a/jarFile.jar") should be (true)
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index bfae32dae0dc5..01ab2d549325c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -28,6 +28,7 @@ import org.scalatest.FunSuite
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, RecoveryState, WorkerInfo}
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
+import org.apache.spark.SparkConf
 
 class JsonProtocolSuite extends FunSuite {
 
@@ -116,7 +117,8 @@ class JsonProtocolSuite extends FunSuite {
   }
   def createExecutorRunner(): ExecutorRunner = {
     new ExecutorRunner("appId", 123, createAppDesc(), 4, 1234, null, "workerId", "host",
-      new File("sparkHome"), new File("workDir"), "akka://worker", ExecutorState.RUNNING)
+      new File("sparkHome"), new File("workDir"), "akka://worker",
+      new SparkConf, ExecutorState.RUNNING)
   }
   def createDriverRunner(): DriverRunner = {
     new DriverRunner("driverId", new File("workDir"), new File("sparkHome"), createDriverDesc(),
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 02427a4a83506..565c53e9529ff 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkException, Test
 import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
-class SparkSubmitSuite extends FunSuite with ShouldMatchers {
+class SparkSubmitSuite extends FunSuite with Matchers {
   def beforeAll() {
     System.setProperty("spark.testing", "true")
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 8ae387fa0be6f..e5f748d55500d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.scalatest.FunSuite
 
 import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
+import org.apache.spark.SparkConf
 
 class ExecutorRunnerTest extends FunSuite {
   test("command includes appId") {
@@ -32,7 +33,7 @@ class ExecutorRunnerTest extends FunSuite {
       sparkHome, "appUiUrl")
     val appId = "12345-worker321-9876"
     val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321", f(sparkHome.getOrElse(".")),
-      f("ooga"), "blah", ExecutorState.RUNNING)
+      f("ooga"), "blah", new SparkConf, ExecutorState.RUNNING)
 
     assert(er.getCommandSeq.last === appId)
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 9ddafc451878d..0b9004448a63e 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -30,6 +30,19 @@ import org.apache.spark.SparkContext._
 import org.apache.spark.{Partitioner, SharedSparkContext}
 
 class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
+  test("aggregateByKey") {
+    val pairs = sc.parallelize(Array((1, 1), (1, 1), (3, 2), (5, 1), (5, 3)), 2)
+
+    val sets = pairs.aggregateByKey(new HashSet[Int]())(_ += _, _ ++= _).collect()
+    assert(sets.size === 3)
+    val valuesFor1 = sets.find(_._1 == 1).get._2
+    assert(valuesFor1.toList.sorted === List(1))
+    val valuesFor3 = sets.find(_._1 == 3).get._2
+    assert(valuesFor3.toList.sorted === List(2))
+    val valuesFor5 = sets.find(_._1 == 5).get._2
+    assert(valuesFor5.toList.sorted === List(1, 3))
+  }
+
   test("groupByKey") {
     val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
     val groups = pairs.groupByKey().collect()
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 2e70a59c2f53e..e94a1e76d410c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.FunSuite
 
 import org.apache.spark._
 import org.apache.spark.SparkContext._
+import org.apache.spark.util.Utils
 
 class RDDSuite extends FunSuite with SharedSparkContext {
 
@@ -65,6 +66,13 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  test("serialization") {
+    val empty = new EmptyRDD[Int](sc)
+    val serial = Utils.serialize(empty)
+    val deserial: EmptyRDD[Int] = Utils.deserialize(serial)
+    assert(!deserial.toString().isEmpty())
+  }
+
   test("countApproxDistinct") {
 
     def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index d0619559bb457..656917628f7a8 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.rdd
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.{Logging, SharedSparkContext}
 import org.apache.spark.SparkContext._
 
-class SortingSuite extends FunSuite with SharedSparkContext with ShouldMatchers with Logging {
+class SortingSuite extends FunSuite with SharedSparkContext with Matchers with Logging {
 
   test("sortByKey") {
     val pairs = sc.parallelize(Array((1, 0), (2, 0), (0, 0), (3, 0)), 2)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 5426e578a9ddd..be506e0287a16 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -22,13 +22,13 @@ import java.util.concurrent.Semaphore
 import scala.collection.mutable
 
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.{LocalSparkContext, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.executor.TaskMetrics
 
-class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
+class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   with BeforeAndAfter with BeforeAndAfterAll {
 
   /** Length of time to wait while draining listener events. */
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 81bd8257bc155..d7dbe5164b7f6 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfter, FunSuite, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.Timeouts._
-import org.scalatest.matchers.ShouldMatchers._
+import org.scalatest.Matchers
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{MapOutputTrackerMaster, SecurityManager, SparkConf}
@@ -39,7 +39,8 @@ import org.apache.spark.util.{AkkaUtils, ByteBufferInputStream, SizeEstimator, U
 import scala.language.implicitConversions
 import scala.language.postfixOps
 
-class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester {
+class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
+  with PrivateMethodTester {
   private val conf = new SparkConf(false)
   var store: BlockManager = null
   var store2: BlockManager = null
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
index 8c06a2d9aa4ab..91b4c7b0dd962 100644
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -18,14 +18,14 @@
 package org.apache.spark.ui.jobs
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.{LocalSparkContext, SparkConf, Success}
 import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
-class JobProgressListenerSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
+class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matchers {
   test("test LRU eviction of stages") {
     val conf = new SparkConf()
     conf.set("spark.ui.retainedStages", 5.toString)
diff --git a/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala
index 63642461e4465..090d48ec921a1 100644
--- a/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/DistributionSuite.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.util
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 /**
  *
  */
 
-class DistributionSuite extends FunSuite with ShouldMatchers {
+class DistributionSuite extends FunSuite with Matchers {
   test("summary") {
     val d = new Distribution((1 to 100).toArray.map{_.toDouble})
     val stats = d.statCounter
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
new file mode 100644
index 0000000000000..53d7f5c6072e6
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io._
+
+import scala.collection.mutable.HashSet
+import scala.reflect._
+
+import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.spark.{Logging, SparkConf}
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.apache.spark.util.logging.{RollingFileAppender, SizeBasedRollingPolicy, TimeBasedRollingPolicy, FileAppender}
+
+class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
+
+  val testFile = new File("FileAppenderSuite-test-" + System.currentTimeMillis).getAbsoluteFile
+
+  before {
+    cleanup()
+  }
+
+  after {
+    cleanup()
+  }
+
+  test("basic file appender") {
+    val testString = (1 to 1000).mkString(", ")
+    val inputStream = IOUtils.toInputStream(testString)
+    val appender = new FileAppender(inputStream, testFile)
+    inputStream.close()
+    appender.awaitTermination()
+    assert(FileUtils.readFileToString(testFile) === testString)
+  }
+
+  test("rolling file appender - time-based rolling") {
+    // setup input stream and appender
+    val testOutputStream = new PipedOutputStream()
+    val testInputStream = new PipedInputStream(testOutputStream, 100 * 1000)
+    val rolloverIntervalMillis = 100
+    val durationMillis = 1000
+    val numRollovers = durationMillis / rolloverIntervalMillis
+    val textToAppend = (1 to numRollovers).map( _.toString * 10 )
+
+    val appender = new RollingFileAppender(testInputStream, testFile,
+      new TimeBasedRollingPolicy(rolloverIntervalMillis, s"--HH-mm-ss-SSSS", false),
+      new SparkConf(), 10)
+
+    testRolling(appender, testOutputStream, textToAppend, rolloverIntervalMillis)
+  }
+
+  test("rolling file appender - size-based rolling") {
+    // setup input stream and appender
+    val testOutputStream = new PipedOutputStream()
+    val testInputStream = new PipedInputStream(testOutputStream, 100 * 1000)
+    val rolloverSize = 1000
+    val textToAppend = (1 to 3).map( _.toString * 1000 )
+
+    val appender = new RollingFileAppender(testInputStream, testFile,
+      new SizeBasedRollingPolicy(rolloverSize, false), new SparkConf(), 99)
+
+    val files = testRolling(appender, testOutputStream, textToAppend, 0)
+    files.foreach { file =>
+      logInfo(file.toString + ": " + file.length + " bytes")
+      assert(file.length <= rolloverSize)
+    }
+  }
+
+  test("rolling file appender - cleaning") {
+    // setup input stream and appender
+    val testOutputStream = new PipedOutputStream()
+    val testInputStream = new PipedInputStream(testOutputStream, 100 * 1000)
+    val conf = new SparkConf().set(RollingFileAppender.RETAINED_FILES_PROPERTY, "10")
+    val appender = new RollingFileAppender(testInputStream, testFile,
+      new SizeBasedRollingPolicy(1000, false), conf, 10)
+
+    // send data to appender through the input stream, and wait for the data to be written
+    val allGeneratedFiles = new HashSet[String]()
+    val items = (1 to 10).map { _.toString * 10000 }
+    for (i <- 0 until items.size) {
+      testOutputStream.write(items(i).getBytes("UTF8"))
+      testOutputStream.flush()
+      allGeneratedFiles ++= RollingFileAppender.getSortedRolledOverFiles(
+        testFile.getParentFile.toString, testFile.getName).map(_.toString)
+
+      Thread.sleep(10)
+    }
+    testOutputStream.close()
+    appender.awaitTermination()
+    logInfo("Appender closed")
+
+    // verify whether the earliest file has been deleted
+    val rolledOverFiles = allGeneratedFiles.filter { _ != testFile.toString }.toArray.sorted
+    logInfo(s"All rolled over files generated:${rolledOverFiles.size}\n" + rolledOverFiles.mkString("\n"))
+    assert(rolledOverFiles.size > 2)
+    val earliestRolledOverFile = rolledOverFiles.head
+    val existingRolledOverFiles = RollingFileAppender.getSortedRolledOverFiles(
+      testFile.getParentFile.toString, testFile.getName).map(_.toString)
+    logInfo("Existing rolled over files:\n" + existingRolledOverFiles.mkString("\n"))
+    assert(!existingRolledOverFiles.toSet.contains(earliestRolledOverFile))
+  }
+
+  test("file appender selection") {
+    // Test whether FileAppender.apply() returns the right type of the FileAppender based
+    // on SparkConf settings.
+
+    def testAppenderSelection[ExpectedAppender: ClassTag, ExpectedRollingPolicy](
+        properties: Seq[(String, String)], expectedRollingPolicyParam: Long = -1): FileAppender = {
+
+      // Set spark conf properties
+      val conf = new SparkConf
+      properties.foreach { p =>
+        conf.set(p._1, p._2)
+      }
+
+      // Create and test file appender
+      val inputStream = new PipedInputStream(new PipedOutputStream())
+      val appender = FileAppender(inputStream, new File("stdout"), conf)
+      assert(appender.isInstanceOf[ExpectedAppender])
+      assert(appender.getClass.getSimpleName ===
+        classTag[ExpectedAppender].runtimeClass.getSimpleName)
+      if (appender.isInstanceOf[RollingFileAppender]) {
+        val rollingPolicy = appender.asInstanceOf[RollingFileAppender].rollingPolicy
+        rollingPolicy.isInstanceOf[ExpectedRollingPolicy]
+        val policyParam = if (rollingPolicy.isInstanceOf[TimeBasedRollingPolicy]) {
+          rollingPolicy.asInstanceOf[TimeBasedRollingPolicy].rolloverIntervalMillis
+        } else {
+          rollingPolicy.asInstanceOf[SizeBasedRollingPolicy].rolloverSizeBytes
+        }
+        assert(policyParam === expectedRollingPolicyParam)
+      }
+      appender
+    }
+
+    import RollingFileAppender._
+
+    def rollingStrategy(strategy: String) = Seq(STRATEGY_PROPERTY -> strategy)
+    def rollingSize(size: String) = Seq(SIZE_PROPERTY -> size)
+    def rollingInterval(interval: String) = Seq(INTERVAL_PROPERTY -> interval)
+
+    val msInDay = 24 * 60 * 60 * 1000L
+    val msInHour = 60 * 60 * 1000L
+    val msInMinute = 60 * 1000L
+
+    // test no strategy -> no rolling
+    testAppenderSelection[FileAppender, Any](Seq.empty)
+
+    // test time based rolling strategy
+    testAppenderSelection[RollingFileAppender, Any](rollingStrategy("time"), msInDay)
+    testAppenderSelection[RollingFileAppender, TimeBasedRollingPolicy](
+      rollingStrategy("time") ++ rollingInterval("daily"), msInDay)
+    testAppenderSelection[RollingFileAppender, TimeBasedRollingPolicy](
+      rollingStrategy("time") ++ rollingInterval("hourly"), msInHour)
+    testAppenderSelection[RollingFileAppender, TimeBasedRollingPolicy](
+      rollingStrategy("time") ++ rollingInterval("minutely"), msInMinute)
+    testAppenderSelection[RollingFileAppender, TimeBasedRollingPolicy](
+      rollingStrategy("time") ++ rollingInterval("123456789"), 123456789 * 1000L)
+    testAppenderSelection[FileAppender, Any](
+      rollingStrategy("time") ++ rollingInterval("xyz"))
+
+    // test size based rolling strategy
+    testAppenderSelection[RollingFileAppender, SizeBasedRollingPolicy](
+      rollingStrategy("size") ++ rollingSize("123456789"), 123456789)
+    testAppenderSelection[FileAppender, Any](rollingSize("xyz"))
+
+    // test illegal strategy
+    testAppenderSelection[FileAppender, Any](rollingStrategy("xyz"))
+  }
+
+  /**
+   * Run the rolling file appender with data and see whether all the data was written correctly
+   * across rolled over files.
+   */
+  def testRolling(
+      appender: FileAppender,
+      outputStream: OutputStream,
+      textToAppend: Seq[String],
+      sleepTimeBetweenTexts: Long
+    ): Seq[File] = {
+    // send data to appender through the input stream, and wait for the data to be written
+    val expectedText = textToAppend.mkString("")
+    for (i <- 0 until textToAppend.size) {
+      outputStream.write(textToAppend(i).getBytes("UTF8"))
+      outputStream.flush()
+      Thread.sleep(sleepTimeBetweenTexts)
+    }
+    logInfo("Data sent to appender")
+    outputStream.close()
+    appender.awaitTermination()
+    logInfo("Appender closed")
+
+    // verify whether all the data written to rolled over files is same as expected
+    val generatedFiles = RollingFileAppender.getSortedRolledOverFiles(
+      testFile.getParentFile.toString, testFile.getName)
+    logInfo("Filtered files: \n" + generatedFiles.mkString("\n"))
+    assert(generatedFiles.size > 1)
+    val allText = generatedFiles.map { file =>
+      FileUtils.readFileToString(file)
+    }.mkString("")
+    assert(allText === expectedText)
+    generatedFiles
+  }
+
+  /** Delete all the generated rolledover files */
+  def cleanup() {
+    testFile.getParentFile.listFiles.filter { file =>
+      file.getName.startsWith(testFile.getName)
+    }.foreach { _.delete() }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
index 32d74d0500b72..cf438a3d72a06 100644
--- a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
@@ -22,9 +22,9 @@ import java.util.NoSuchElementException
 import scala.collection.mutable.Buffer
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
-class NextIteratorSuite extends FunSuite with ShouldMatchers {
+class NextIteratorSuite extends FunSuite with Matchers {
   test("one iteration") {
     val i = new StubIterator(Buffer(1))
     i.hasNext should be === true
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 0aad882ed76a8..1ee936bc78f49 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -140,6 +140,38 @@ class UtilsSuite extends FunSuite {
     Utils.deleteRecursively(tmpDir2)
   }
 
+  test("reading offset bytes across multiple files") {
+    val tmpDir = Files.createTempDir()
+    tmpDir.deleteOnExit()
+    val files = (1 to 3).map(i => new File(tmpDir, i.toString))
+    Files.write("0123456789", files(0), Charsets.UTF_8)
+    Files.write("abcdefghij", files(1), Charsets.UTF_8)
+    Files.write("ABCDEFGHIJ", files(2), Charsets.UTF_8)
+
+    // Read first few bytes in the 1st file
+    assert(Utils.offsetBytes(files, 0, 5) === "01234")
+
+    // Read bytes within the 1st file
+    assert(Utils.offsetBytes(files, 5, 8) === "567")
+
+    // Read bytes across 1st and 2nd file
+    assert(Utils.offsetBytes(files, 8, 18) === "89abcdefgh")
+
+    // Read bytes across 1st, 2nd and 3rd file
+    assert(Utils.offsetBytes(files, 5, 24) === "56789abcdefghijABCD")
+
+    // Read some nonexistent bytes in the beginning
+    assert(Utils.offsetBytes(files, -5, 18) === "0123456789abcdefgh")
+
+    // Read some nonexistent bytes at the end
+    assert(Utils.offsetBytes(files, 18, 35) === "ijABCDEFGHIJ")
+
+    // Read some nonexistent bytes on both ends
+    assert(Utils.offsetBytes(files, -5, 35) === "0123456789abcdefghijABCDEFGHIJ")
+
+    Utils.deleteRecursively(tmpDir)
+  }
+
   test("deserialize long value") {
     val testval : Long = 9730889947L
     val bbuf = ByteBuffer.allocate(8)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index b024c89d94d33..6a70877356409 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -20,11 +20,11 @@ package org.apache.spark.util.collection
 import scala.collection.mutable.HashSet
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.util.SizeEstimator
 
-class OpenHashMapSuite extends FunSuite with ShouldMatchers {
+class OpenHashMapSuite extends FunSuite with Matchers {
 
   test("size for specialized, primitive value (int)") {
     val capacity = 1024
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index ff4a98f5dcd4a..68a03e3a0970f 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -18,11 +18,11 @@
 package org.apache.spark.util.collection
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.util.SizeEstimator
 
-class OpenHashSetSuite extends FunSuite with ShouldMatchers {
+class OpenHashSetSuite extends FunSuite with Matchers {
 
   test("size for specialized, primitive int") {
     val loadFactor = 0.7
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
index e3fca173908e9..8c7df7d73dcd3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
@@ -20,11 +20,11 @@ package org.apache.spark.util.collection
 import scala.collection.mutable.HashSet
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.util.SizeEstimator
 
-class PrimitiveKeyOpenHashMapSuite extends FunSuite with ShouldMatchers {
+class PrimitiveKeyOpenHashMapSuite extends FunSuite with Matchers {
 
   test("size for specialized, primitive key, value (int, int)") {
     val capacity = 1024
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
index 0865c6386f7cd..e15fd59a5a8bb 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.util.random
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.util.Utils.times
 
 import scala.language.reflectiveCalls
 
-class XORShiftRandomSuite extends FunSuite with ShouldMatchers {
+class XORShiftRandomSuite extends FunSuite with Matchers {
 
   def fixture = new {
     val seed = 1L
diff --git a/dev/mima b/dev/mima
index ab6bd4469b0e8..b68800d6d0173 100755
--- a/dev/mima
+++ b/dev/mima
@@ -23,6 +23,9 @@ set -o pipefail
 FWDIR="$(cd `dirname $0`/..; pwd)"
 cd $FWDIR
 
+echo -e "q\n" | sbt/sbt oldDeps/update
+
+export SPARK_CLASSPATH=`find lib_managed \( -name '*spark*jar' -a -type f \) -printf "%p:" `
 ./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
 echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
 ret_val=$?
@@ -31,5 +34,5 @@ if [ $ret_val != 0 ]; then
   echo "NOTE: Exceptions to binary compatibility can be added in project/MimaExcludes.scala"
 fi
 
-rm -f .generated-mima-excludes
+rm -f .generated-mima*
 exit $ret_val
diff --git a/dev/run-tests b/dev/run-tests
index 93d6692f83ca8..c82a47ebb618b 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -73,9 +73,6 @@ fi
 echo "========================================================================="
 echo "Running PySpark tests"
 echo "========================================================================="
-if [ -z "$PYSPARK_PYTHON" ]; then
-  export PYSPARK_PYTHON=/usr/local/bin/python2.7
-fi
 ./python/run-tests
 
 echo "========================================================================="
diff --git a/docs/configuration.md b/docs/configuration.md
index 71fafa573467f..b84104cc7e653 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -784,6 +784,45 @@ Apart from these, the following properties are also available, and may be useful
     higher memory usage in Spark.
   </td>
 </tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.strategy</code></td>
+  <td>(none)</td>
+  <td>
+    Set the strategy of rolling of executor logs. By default it is disabled. It can
+    be set to "time" (time-based rolling) or "size" (size-based rolling). For "time",
+    use <code>spark.executor.logs.rolling.time.interval</code> to set the rolling interval.
+    For "size", use <code>spark.executor.logs.rolling.size.maxBytes</code> to set
+    the maximum file size for rolling.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.time.interval</code></td>
+  <td>daily</td>
+  <td>
+    Set the time interval by which the executor logs will be rolled over.
+    Rolling is disabled by default. Valid values are `daily`, `hourly`, `minutely` or
+    any interval in seconds. See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
+    for automatic cleaning of old logs.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.size.maxBytes</code></td>
+  <td>(none)</td>
+  <td>
+    Set the max size of the file by which the executor logs will be rolled over.
+    Rolling is disabled by default. Value is set in terms of bytes.
+    See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
+    for automatic cleaning of old logs.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.maxRetainedFiles</code></td>
+  <td>(none)</td>
+  <td>
+    Sets the number of latest rolling log files that are going to be retained by the system.
+    Older log files will be deleted. Disabled by default.
+  </td>
+</tr>
 </table>
 
 #### Cluster Managers
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 7989e02dfb732..79784682bfd1b 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -890,6 +890,10 @@ for details.
   <td> <b>reduceByKey</b>(<i>func</i>, [<i>numTasks</i>]) </td>
   <td> When called on a dataset of (K, V) pairs, returns a dataset of (K, V) pairs where the values for each key are aggregated using the given reduce function. Like in <code>groupByKey</code>, the number of reduce tasks is configurable through an optional second argument. </td>
 </tr>
+<tr>
+  <td> <b>aggregateByKey</b>(<i>zeroValue</i>)(<i>seqOp</i>, <i>combOp</i>, [<i>numTasks</i>]) </td>
+  <td> When called on a dataset of (K, V) pairs, returns a dataset of (K, U) pairs where the values for each key are aggregated using the given combine functions and a neutral "zero" value. Allows an aggregated value type that is different than the input value type, while avoiding unnecessary allocations. Like in <code>groupByKey</code>, the number of reduce tasks is configurable through an optional second argument. </td>
+</tr>
 <tr>
   <td> <b>sortByKey</b>([<i>ascending</i>], [<i>numTasks</i>]) </td>
   <td> When called on a dataset of (K, V) pairs where K implements Ordered, returns a dataset of (K, V) pairs sorted by keys in ascending or descending order, as specified in the boolean <code>ascending</code> argument.</td>
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 9d5748ba4bc23..52a89cb2481ca 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -200,6 +200,7 @@ def get_spark_shark_version(opts):
         sys.exit(1)
     return (version, spark_shark_map[version])
 
+
 # Attempt to resolve an appropriate AMI given the architecture and
 # region of the request.
 def get_spark_ami(opts):
@@ -418,6 +419,16 @@ def launch_cluster(conn, opts, cluster_name):
         master_nodes = master_res.instances
         print "Launched master in %s, regid = %s" % (zone, master_res.id)
 
+    # Give the instances descriptive names
+    for master in master_nodes:
+        master.add_tag(
+            key='Name',
+            value='spark-{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
+    for slave in slave_nodes:
+        slave.add_tag(
+            key='Name',
+            value='spark-{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
+
     # Return all the instances
     return (master_nodes, slave_nodes)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 6eb41e7ba36fb..28e201d279f41 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -50,6 +50,8 @@ object MovieLensALS {
       numIterations: Int = 20,
       lambda: Double = 1.0,
       rank: Int = 10,
+      numUserBlocks: Int = -1,
+      numProductBlocks: Int = -1,
       implicitPrefs: Boolean = false)
 
   def main(args: Array[String]) {
@@ -67,8 +69,14 @@ object MovieLensALS {
         .text(s"lambda (smoothing constant), default: ${defaultParams.lambda}")
         .action((x, c) => c.copy(lambda = x))
       opt[Unit]("kryo")
-        .text(s"use Kryo serialization")
+        .text("use Kryo serialization")
         .action((_, c) => c.copy(kryo = true))
+      opt[Int]("numUserBlocks")
+        .text(s"number of user blocks, default: ${defaultParams.numUserBlocks} (auto)")
+        .action((x, c) => c.copy(numUserBlocks = x))
+      opt[Int]("numProductBlocks")
+        .text(s"number of product blocks, default: ${defaultParams.numProductBlocks} (auto)")
+        .action((x, c) => c.copy(numProductBlocks = x))
       opt[Unit]("implicitPrefs")
         .text("use implicit preference")
         .action((_, c) => c.copy(implicitPrefs = true))
@@ -160,6 +168,8 @@ object MovieLensALS {
       .setIterations(params.numIterations)
       .setLambda(params.lambda)
       .setImplicitPrefs(params.implicitPrefs)
+      .setUserBlocks(params.numUserBlocks)
+      .setProductBlocks(params.numProductBlocks)
       .run(training)
 
     val rmse = computeRmse(model, test, params.implicitPrefs)
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
index 5be33f1d5c428..ed35e34ad45ab 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
@@ -71,12 +71,12 @@ class SparkFlumeEvent() extends Externalizable {
     for (i <- 0 until numHeaders) {
       val keyLength = in.readInt()
       val keyBuff = new Array[Byte](keyLength)
-      in.read(keyBuff)
+      in.readFully(keyBuff)
       val key : String = Utils.deserialize(keyBuff)
 
       val valLength = in.readInt()
       val valBuff = new Array[Byte](valLength)
-      in.read(valBuff)
+      in.readFully(valBuff)
       val value : String = Utils.deserialize(valBuff)
 
       headers.put(key, value)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index d743bd7dd1825..cc56fd6ef28d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -61,7 +61,7 @@ private[recommendation] case class InLinkBlock(
  * A more compact class to represent a rating than Tuple3[Int, Int, Double].
  */
 @Experimental
-case class Rating(val user: Int, val product: Int, val rating: Double)
+case class Rating(user: Int, product: Int, rating: Double)
 
 /**
  * Alternating Least Squares matrix factorization.
@@ -93,7 +93,8 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
  * preferences rather than explicit ratings given to items.
  */
 class ALS private (
-    private var numBlocks: Int,
+    private var numUserBlocks: Int,
+    private var numProductBlocks: Int,
     private var rank: Int,
     private var iterations: Int,
     private var lambda: Double,
@@ -106,14 +107,31 @@ class ALS private (
    * Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10,
    * lambda: 0.01, implicitPrefs: false, alpha: 1.0}.
    */
-  def this() = this(-1, 10, 10, 0.01, false, 1.0)
+  def this() = this(-1, -1, 10, 10, 0.01, false, 1.0)
 
   /**
-   * Set the number of blocks to parallelize the computation into; pass -1 for an auto-configured
-   * number of blocks. Default: -1.
+   * Set the number of blocks for both user blocks and product blocks to parallelize the computation
+   * into; pass -1 for an auto-configured number of blocks. Default: -1.
    */
   def setBlocks(numBlocks: Int): ALS = {
-    this.numBlocks = numBlocks
+    this.numUserBlocks = numBlocks
+    this.numProductBlocks = numBlocks
+    this
+  }
+
+  /**
+   * Set the number of user blocks to parallelize the computation.
+   */
+  def setUserBlocks(numUserBlocks: Int): ALS = {
+    this.numUserBlocks = numUserBlocks
+    this
+  }
+
+  /**
+   * Set the number of product blocks to parallelize the computation.
+   */
+  def setProductBlocks(numProductBlocks: Int): ALS = {
+    this.numProductBlocks = numProductBlocks
     this
   }
 
@@ -176,31 +194,32 @@ class ALS private (
   def run(ratings: RDD[Rating]): MatrixFactorizationModel = {
     val sc = ratings.context
 
-    val numBlocks = if (this.numBlocks == -1) {
+    val numUserBlocks = if (this.numUserBlocks == -1) {
       math.max(sc.defaultParallelism, ratings.partitions.size / 2)
     } else {
-      this.numBlocks
+      this.numUserBlocks
     }
-
-    val partitioner = new Partitioner {
-      val numPartitions = numBlocks
-
-      def getPartition(x: Any): Int = {
-        Utils.nonNegativeMod(byteswap32(x.asInstanceOf[Int]), numPartitions)
-      }
+    val numProductBlocks = if (this.numProductBlocks == -1) {
+      math.max(sc.defaultParallelism, ratings.partitions.size / 2)
+    } else {
+      this.numProductBlocks
     }
 
-    val ratingsByUserBlock = ratings.map{ rating =>
-      (partitioner.getPartition(rating.user), rating)
+    val userPartitioner = new ALSPartitioner(numUserBlocks)
+    val productPartitioner = new ALSPartitioner(numProductBlocks)
+
+    val ratingsByUserBlock = ratings.map { rating =>
+      (userPartitioner.getPartition(rating.user), rating)
     }
-    val ratingsByProductBlock = ratings.map{ rating =>
-      (partitioner.getPartition(rating.product),
+    val ratingsByProductBlock = ratings.map { rating =>
+      (productPartitioner.getPartition(rating.product),
         Rating(rating.product, rating.user, rating.rating))
     }
 
-    val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock, partitioner)
+    val (userInLinks, userOutLinks) =
+      makeLinkRDDs(numUserBlocks, numProductBlocks, ratingsByUserBlock, productPartitioner)
     val (productInLinks, productOutLinks) =
-        makeLinkRDDs(numBlocks, ratingsByProductBlock, partitioner)
+      makeLinkRDDs(numProductBlocks, numUserBlocks, ratingsByProductBlock, userPartitioner)
     userInLinks.setName("userInLinks")
     userOutLinks.setName("userOutLinks")
     productInLinks.setName("productInLinks")
@@ -232,27 +251,27 @@ class ALS private (
         users.setName(s"users-$iter").persist()
         val YtY = Some(sc.broadcast(computeYtY(users)))
         val previousProducts = products
-        products = updateFeatures(users, userOutLinks, productInLinks, partitioner, rank, lambda,
-          alpha, YtY)
+        products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
+          userPartitioner, rank, lambda, alpha, YtY)
         previousProducts.unpersist()
         logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
         products.setName(s"products-$iter").persist()
         val XtX = Some(sc.broadcast(computeYtY(products)))
         val previousUsers = users
-        users = updateFeatures(products, productOutLinks, userInLinks, partitioner, rank, lambda,
-          alpha, XtX)
+        users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
+          productPartitioner, rank, lambda, alpha, XtX)
         previousUsers.unpersist()
       }
     } else {
       for (iter <- 1 to iterations) {
         // perform ALS update
         logInfo("Re-computing I given U (Iteration %d/%d)".format(iter, iterations))
-        products = updateFeatures(users, userOutLinks, productInLinks, partitioner, rank, lambda,
-          alpha, YtY = None)
+        products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
+          userPartitioner, rank, lambda, alpha, YtY = None)
         products.setName(s"products-$iter")
         logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
-        users = updateFeatures(products, productOutLinks, userInLinks, partitioner, rank, lambda,
-          alpha, YtY = None)
+        users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
+          productPartitioner, rank, lambda, alpha, YtY = None)
         users.setName(s"users-$iter")
       }
     }
@@ -340,9 +359,10 @@ class ALS private (
   /**
    * Flatten out blocked user or product factors into an RDD of (id, factor vector) pairs
    */
-  private def unblockFactors(blockedFactors: RDD[(Int, Array[Array[Double]])],
-                     outLinks: RDD[(Int, OutLinkBlock)]) = {
-    blockedFactors.join(outLinks).flatMap{ case (b, (factors, outLinkBlock)) =>
+  private def unblockFactors(
+      blockedFactors: RDD[(Int, Array[Array[Double]])],
+      outLinks: RDD[(Int, OutLinkBlock)]): RDD[(Int, Array[Double])] = {
+    blockedFactors.join(outLinks).flatMap { case (b, (factors, outLinkBlock)) =>
       for (i <- 0 until factors.length) yield (outLinkBlock.elementIds(i), factors(i))
     }
   }
@@ -351,14 +371,14 @@ class ALS private (
    * Make the out-links table for a block of the users (or products) dataset given the list of
    * (user, product, rating) values for the users in that block (or the opposite for products).
    */
-  private def makeOutLinkBlock(numBlocks: Int, ratings: Array[Rating],
-      partitioner: Partitioner): OutLinkBlock = {
+  private def makeOutLinkBlock(numProductBlocks: Int, ratings: Array[Rating],
+      productPartitioner: Partitioner): OutLinkBlock = {
     val userIds = ratings.map(_.user).distinct.sorted
     val numUsers = userIds.length
     val userIdToPos = userIds.zipWithIndex.toMap
-    val shouldSend = Array.fill(numUsers)(new BitSet(numBlocks))
+    val shouldSend = Array.fill(numUsers)(new BitSet(numProductBlocks))
     for (r <- ratings) {
-      shouldSend(userIdToPos(r.user))(partitioner.getPartition(r.product)) = true
+      shouldSend(userIdToPos(r.user))(productPartitioner.getPartition(r.product)) = true
     }
     OutLinkBlock(userIds, shouldSend)
   }
@@ -367,18 +387,17 @@ class ALS private (
    * Make the in-links table for a block of the users (or products) dataset given a list of
    * (user, product, rating) values for the users in that block (or the opposite for products).
    */
-  private def makeInLinkBlock(numBlocks: Int, ratings: Array[Rating],
-      partitioner: Partitioner): InLinkBlock = {
+  private def makeInLinkBlock(numProductBlocks: Int, ratings: Array[Rating],
+      productPartitioner: Partitioner): InLinkBlock = {
     val userIds = ratings.map(_.user).distinct.sorted
-    val numUsers = userIds.length
     val userIdToPos = userIds.zipWithIndex.toMap
     // Split out our ratings by product block
-    val blockRatings = Array.fill(numBlocks)(new ArrayBuffer[Rating])
+    val blockRatings = Array.fill(numProductBlocks)(new ArrayBuffer[Rating])
     for (r <- ratings) {
-      blockRatings(partitioner.getPartition(r.product)) += r
+      blockRatings(productPartitioner.getPartition(r.product)) += r
     }
-    val ratingsForBlock = new Array[Array[(Array[Int], Array[Double])]](numBlocks)
-    for (productBlock <- 0 until numBlocks) {
+    val ratingsForBlock = new Array[Array[(Array[Int], Array[Double])]](numProductBlocks)
+    for (productBlock <- 0 until numProductBlocks) {
       // Create an array of (product, Seq(Rating)) ratings
       val groupedRatings = blockRatings(productBlock).groupBy(_.product).toArray
       // Sort them by product ID
@@ -400,14 +419,16 @@ class ALS private (
    * the users (or (blockId, (p, u, r)) for the products). We create these simultaneously to avoid
    * having to shuffle the (blockId, (u, p, r)) RDD twice, or to cache it.
    */
-  private def makeLinkRDDs(numBlocks: Int, ratings: RDD[(Int, Rating)], partitioner: Partitioner)
-    : (RDD[(Int, InLinkBlock)], RDD[(Int, OutLinkBlock)]) =
-  {
-    val grouped = ratings.partitionBy(new HashPartitioner(numBlocks))
+  private def makeLinkRDDs(
+      numUserBlocks: Int,
+      numProductBlocks: Int,
+      ratingsByUserBlock: RDD[(Int, Rating)],
+      productPartitioner: Partitioner): (RDD[(Int, InLinkBlock)], RDD[(Int, OutLinkBlock)]) = {
+    val grouped = ratingsByUserBlock.partitionBy(new HashPartitioner(numUserBlocks))
     val links = grouped.mapPartitionsWithIndex((blockId, elements) => {
-      val ratings = elements.map{_._2}.toArray
-      val inLinkBlock = makeInLinkBlock(numBlocks, ratings, partitioner)
-      val outLinkBlock = makeOutLinkBlock(numBlocks, ratings, partitioner)
+      val ratings = elements.map(_._2).toArray
+      val inLinkBlock = makeInLinkBlock(numProductBlocks, ratings, productPartitioner)
+      val outLinkBlock = makeOutLinkBlock(numProductBlocks, ratings, productPartitioner)
       Iterator.single((blockId, (inLinkBlock, outLinkBlock)))
     }, true)
     val inLinks = links.mapValues(_._1)
@@ -439,26 +460,24 @@ class ALS private (
    * It returns an RDD of new feature vectors for each user block.
    */
   private def updateFeatures(
+      numUserBlocks: Int,
       products: RDD[(Int, Array[Array[Double]])],
       productOutLinks: RDD[(Int, OutLinkBlock)],
       userInLinks: RDD[(Int, InLinkBlock)],
-      partitioner: Partitioner,
+      productPartitioner: Partitioner,
       rank: Int,
       lambda: Double,
       alpha: Double,
-      YtY: Option[Broadcast[DoubleMatrix]])
-    : RDD[(Int, Array[Array[Double]])] =
-  {
-    val numBlocks = products.partitions.size
+      YtY: Option[Broadcast[DoubleMatrix]]): RDD[(Int, Array[Array[Double]])] = {
     productOutLinks.join(products).flatMap { case (bid, (outLinkBlock, factors)) =>
-        val toSend = Array.fill(numBlocks)(new ArrayBuffer[Array[Double]])
-        for (p <- 0 until outLinkBlock.elementIds.length; userBlock <- 0 until numBlocks) {
+        val toSend = Array.fill(numUserBlocks)(new ArrayBuffer[Array[Double]])
+        for (p <- 0 until outLinkBlock.elementIds.length; userBlock <- 0 until numUserBlocks) {
           if (outLinkBlock.shouldSend(p)(userBlock)) {
             toSend(userBlock) += factors(p)
           }
         }
         toSend.zipWithIndex.map{ case (buf, idx) => (idx, (bid, buf.toArray)) }
-    }.groupByKey(partitioner)
+    }.groupByKey(productPartitioner)
      .join(userInLinks)
      .mapValues{ case (messages, inLinkBlock) =>
         updateBlock(messages, inLinkBlock, rank, lambda, alpha, YtY)
@@ -475,7 +494,7 @@ class ALS private (
   {
     // Sort the incoming block factor messages by block ID and make them an array
     val blockFactors = messages.toSeq.sortBy(_._1).map(_._2).toArray // Array[Array[Double]]
-    val numBlocks = blockFactors.length
+    val numProductBlocks = blockFactors.length
     val numUsers = inLinkBlock.elementIds.length
 
     // We'll sum up the XtXes using vectors that represent only the lower-triangular part, since
@@ -488,9 +507,12 @@ class ALS private (
     val tempXtX = DoubleMatrix.zeros(triangleSize)
     val fullXtX = DoubleMatrix.zeros(rank, rank)
 
+    // Count the number of ratings each user gives to provide user-specific regularization
+    val numRatings = Array.fill(numUsers)(0)
+
     // Compute the XtX and Xy values for each user by adding products it rated in each product
     // block
-    for (productBlock <- 0 until numBlocks) {
+    for (productBlock <- 0 until numProductBlocks) {
       var p = 0
       while (p < blockFactors(productBlock).length) {
         val x = wrapDoubleArray(blockFactors(productBlock)(p))
@@ -500,6 +522,7 @@ class ALS private (
         if (implicitPrefs) {
           var i = 0
           while (i < us.length) {
+            numRatings(us(i)) += 1
             // Extension to the original paper to handle rs(i) < 0. confidence is a function
             // of |rs(i)| instead so that it is never negative:
             val confidence = 1 + alpha * abs(rs(i))
@@ -515,6 +538,7 @@ class ALS private (
         } else {
           var i = 0
           while (i < us.length) {
+            numRatings(us(i)) += 1
             userXtX(us(i)).addi(tempXtX)
             SimpleBlas.axpy(rs(i), x, userXy(us(i)))
             i += 1
@@ -531,9 +555,10 @@ class ALS private (
       // Compute the full XtX matrix from the lower-triangular part we got above
       fillFullMatrix(userXtX(index), fullXtX)
       // Add regularization
+      val regParam = numRatings(index) * lambda
       var i = 0
       while (i < rank) {
-        fullXtX.data(i * rank + i) += lambda
+        fullXtX.data(i * rank + i) += regParam
         i += 1
       }
       // Solve the resulting matrix, which is symmetric and positive-definite
@@ -579,6 +604,23 @@ class ALS private (
   }
 }
 
+/**
+ * Partitioner for ALS.
+ */
+private[recommendation] class ALSPartitioner(override val numPartitions: Int) extends Partitioner {
+  override def getPartition(key: Any): Int = {
+    Utils.nonNegativeMod(byteswap32(key.asInstanceOf[Int]), numPartitions)
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case p: ALSPartitioner =>
+        this.numPartitions == p.numPartitions
+      case _ =>
+        false
+    }
+  }
+}
 
 /**
  * Top-level methods for calling Alternating Least Squares (ALS) matrix factorization.
@@ -606,7 +648,7 @@ object ALS {
       blocks: Int,
       seed: Long
     ): MatrixFactorizationModel = {
-    new ALS(blocks, rank, iterations, lambda, false, 1.0, seed).run(ratings)
+    new ALS(blocks, blocks, rank, iterations, lambda, false, 1.0, seed).run(ratings)
   }
 
   /**
@@ -629,7 +671,7 @@ object ALS {
       lambda: Double,
       blocks: Int
     ): MatrixFactorizationModel = {
-    new ALS(blocks, rank, iterations, lambda, false, 1.0).run(ratings)
+    new ALS(blocks, blocks, rank, iterations, lambda, false, 1.0).run(ratings)
   }
 
   /**
@@ -689,7 +731,7 @@ object ALS {
       alpha: Double,
       seed: Long
     ): MatrixFactorizationModel = {
-    new ALS(blocks, rank, iterations, lambda, true, alpha, seed).run(ratings)
+    new ALS(blocks, blocks, rank, iterations, lambda, true, alpha, seed).run(ratings)
   }
 
   /**
@@ -714,7 +756,7 @@ object ALS {
       blocks: Int,
       alpha: Double
     ): MatrixFactorizationModel = {
-    new ALS(blocks, rank, iterations, lambda, true, alpha).run(ratings)
+    new ALS(blocks, blocks, rank, iterations, lambda, true, alpha).run(ratings)
   }
 
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 4d7b984e3ec29..44b757b6a1fb7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -21,7 +21,7 @@ import scala.util.Random
 import scala.collection.JavaConversions._
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression._
@@ -56,7 +56,7 @@ object LogisticRegressionSuite {
   }
 }
 
-class LogisticRegressionSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
+class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Matchers {
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index 8a16284118cf7..951b4f7c6e6f4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -21,7 +21,7 @@ import scala.util.Random
 import scala.collection.JavaConversions._
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.util.LocalSparkContext
@@ -61,7 +61,7 @@ object GradientDescentSuite {
   }
 }
 
-class GradientDescentSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
+class GradientDescentSuite extends FunSuite with LocalSparkContext with Matchers {
 
   test("Assert the loss is decreasing.") {
     val nPoints = 10000
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 820eca9b1bf65..4b1850659a18e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.mllib.optimization
 
 import org.scalatest.FunSuite
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
 
-class LBFGSSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
+class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
 
   val nPoints = 10000
   val A = 2.0
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 37c9b9d085841..81bebec8c7a39 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -121,6 +121,10 @@ class ALSSuite extends FunSuite with LocalSparkContext {
     testALS(100, 200, 2, 15, 0.7, 0.4, true, false, true)
   }
 
+  test("rank-2 matrices with different user and product blocks") {
+    testALS(100, 200, 2, 15, 0.7, 0.4, numUserBlocks = 4, numProductBlocks = 2)
+  }
+
   test("pseudorandomness") {
     val ratings = sc.parallelize(ALSSuite.generateRatings(10, 20, 5, 0.5, false, false)._1, 2)
     val model11 = ALS.train(ratings, 5, 1, 1.0, 2, 1)
@@ -153,35 +157,52 @@ class ALSSuite extends FunSuite with LocalSparkContext {
   }
 
   test("NNALS, rank 2") {
-    testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, false)
+    testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, -1, false)
   }
 
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known rank.
    *
-   * @param users          number of users
-   * @param products       number of products
-   * @param features       number of features (rank of problem)
-   * @param iterations     number of iterations to run
-   * @param samplingRate   what fraction of the user-product pairs are known
+   * @param users number of users
+   * @param products number of products
+   * @param features number of features (rank of problem)
+   * @param iterations number of iterations to run
+   * @param samplingRate what fraction of the user-product pairs are known
    * @param matchThreshold max difference allowed to consider a predicted rating correct
-   * @param implicitPrefs  flag to test implicit feedback
-   * @param bulkPredict    flag to test bulk prediciton
+   * @param implicitPrefs flag to test implicit feedback
+   * @param bulkPredict flag to test bulk prediciton
    * @param negativeWeights whether the generated data can contain negative values
-   * @param numBlocks      number of blocks to partition users and products into
+   * @param numUserBlocks number of user blocks to partition users into
+   * @param numProductBlocks number of product blocks to partition products into
    * @param negativeFactors whether the generated user/product factors can have negative entries
    */
-  def testALS(users: Int, products: Int, features: Int, iterations: Int,
-    samplingRate: Double, matchThreshold: Double, implicitPrefs: Boolean = false,
-    bulkPredict: Boolean = false, negativeWeights: Boolean = false, numBlocks: Int = -1,
-    negativeFactors: Boolean = true)
-  {
+  def testALS(
+      users: Int,
+      products: Int,
+      features: Int,
+      iterations: Int,
+      samplingRate: Double,
+      matchThreshold: Double,
+      implicitPrefs: Boolean = false,
+      bulkPredict: Boolean = false,
+      negativeWeights: Boolean = false,
+      numUserBlocks: Int = -1,
+      numProductBlocks: Int = -1,
+      negativeFactors: Boolean = true) {
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
       features, samplingRate, implicitPrefs, negativeWeights, negativeFactors)
 
-    val model = (new ALS().setBlocks(numBlocks).setRank(features).setIterations(iterations)
-          .setAlpha(1.0).setImplicitPrefs(implicitPrefs).setLambda(0.01).setSeed(0L)
-          .setNonnegative(!negativeFactors).run(sc.parallelize(sampledRatings)))
+    val model = new ALS()
+      .setUserBlocks(numUserBlocks)
+      .setProductBlocks(numProductBlocks)
+      .setRank(features)
+      .setIterations(iterations)
+      .setAlpha(1.0)
+      .setImplicitPrefs(implicitPrefs)
+      .setLambda(0.01)
+      .setSeed(0L)
+      .setNonnegative(!negativeFactors)
+      .run(sc.parallelize(sampledRatings))
 
     val predictedU = new DoubleMatrix(users, features)
     for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) {
@@ -208,8 +229,9 @@ class ALSSuite extends FunSuite with LocalSparkContext {
         val prediction = predictedRatings.get(u, p)
         val correct = trueRatings.get(u, p)
         if (math.abs(prediction - correct) > matchThreshold) {
-          fail("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s".format(
-            u, p, correct, prediction, trueRatings, predictedRatings, predictedU, predictedP))
+          fail(("Model failed to predict (%d, %d): %f vs %f\ncorr: %s\npred: %s\nU: %s\n P: %s")
+            .format(u, p, correct, prediction, trueRatings, predictedRatings, predictedU,
+              predictedP))
         }
       }
     } else {
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 1477809943573..bb2d73741c3bf 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -15,16 +15,26 @@
  * limitations under the License.
  */
 
-import com.typesafe.tools.mima.core.{MissingTypesProblem, MissingClassProblem, ProblemFilters}
+import com.typesafe.tools.mima.core._
+import com.typesafe.tools.mima.core.MissingClassProblem
+import com.typesafe.tools.mima.core.MissingTypesProblem
 import com.typesafe.tools.mima.core.ProblemFilters._
 import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
 import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
 import sbt._
 
 object MimaBuild {
+
+  def excludeMember(fullName: String) = Seq(
+      ProblemFilters.exclude[MissingMethodProblem](fullName),
+      ProblemFilters.exclude[MissingFieldProblem](fullName),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem](fullName),
+      ProblemFilters.exclude[IncompatibleMethTypeProblem](fullName),
+      ProblemFilters.exclude[IncompatibleFieldTypeProblem](fullName)
+    )
+
   // Exclude a single class and its corresponding object
-  def excludeClass(className: String) = {
-    Seq(
+  def excludeClass(className: String) = Seq(
       excludePackage(className),
       ProblemFilters.exclude[MissingClassProblem](className),
       ProblemFilters.exclude[MissingTypesProblem](className),
@@ -32,7 +42,7 @@ object MimaBuild {
       ProblemFilters.exclude[MissingClassProblem](className + "$"),
       ProblemFilters.exclude[MissingTypesProblem](className + "$")
     )
-  }
+
   // Exclude a Spark class, that is in the package org.apache.spark
   def excludeSparkClass(className: String) = {
     excludeClass("org.apache.spark." + className)
@@ -49,20 +59,25 @@ object MimaBuild {
     val defaultExcludes = Seq()
 
     // Read package-private excludes from file
-    val excludeFilePath = (base.getAbsolutePath + "/.generated-mima-excludes")
-    val excludeFile = file(excludeFilePath)
+    val classExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-class-excludes")
+    val memberExcludeFilePath = file(base.getAbsolutePath + "/.generated-mima-member-excludes")
+
     val ignoredClasses: Seq[String] =
-      if (!excludeFile.exists()) {
+      if (!classExcludeFilePath.exists()) {
         Seq()
       } else {
-        IO.read(excludeFile).split("\n")
+        IO.read(classExcludeFilePath).split("\n")
       }
 
+    val ignoredMembers: Seq[String] =
+      if (!memberExcludeFilePath.exists()) {
+      Seq()
+    } else {
+      IO.read(memberExcludeFilePath).split("\n")
+    }
 
-
-    val externalExcludeFileClasses = ignoredClasses.flatMap(excludeClass)
-
-    defaultExcludes ++ externalExcludeFileClasses ++ MimaExcludes.excludes
+    defaultExcludes ++ ignoredClasses.flatMap(excludeClass) ++
+    ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes
   }
 
   def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index dd7efceb23c96..ee629794f60ad 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -52,7 +52,18 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1")
+              "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
+                + "createZero$1")
+          ) ++
+          Seq( // Ignore some private methods in ALS.
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
+            ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
+              "org.apache.spark.mllib.recommendation.ALS.this"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7")
           ) ++
           MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
           MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ad2897d3fb46d..2d60a44f04f6f 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -59,6 +59,10 @@ object SparkBuild extends Build {
 
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
+  /** Following project only exists to pull previous artifacts of Spark for generating
+    Mima ignores. For more information see: SPARK 2071 */
+  lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings)
+
   def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef
 
   lazy val repl = Project("repl", file("repl"), settings = replSettings)
@@ -86,7 +90,16 @@ object SparkBuild extends Build {
   lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
     .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
 
-  lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")
+  lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
+  lazy val assembleDeps = assembleDepsTask := {
+    println()
+    println("**** NOTE ****")
+    println("'sbt/sbt assemble-deps' is no longer supported.")
+    println("Instead create a normal assembly and:")
+    println("  export SPARK_PREPEND_CLASSES=1 (toggle on)")
+    println("  unset SPARK_PREPEND_CLASSES (toggle off)")
+    println()
+  }
 
   // A configuration to set an alternative publishLocalConfiguration
   lazy val MavenCompile = config("m2r") extend(Compile)
@@ -370,6 +383,7 @@ object SparkBuild extends Build {
         "net.sf.py4j"                % "py4j"             % "0.8.1"
       ),
     libraryDependencies ++= maybeAvro,
+    assembleDeps,
     previousArtifact := sparkPreviousArtifact("spark-core")
   )
 
@@ -581,9 +595,7 @@ object SparkBuild extends Build {
 
   def assemblyProjSettings = sharedSettings ++ Seq(
     name := "spark-assembly",
-    assembleDeps in Compile <<= (packageProjects.map(packageBin in Compile in _) ++ Seq(packageDependency in Compile)).dependOn,
-    jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" },
-    jarName in packageDependency <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + "-deps.jar" }
+    jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
   ) ++ assemblySettings ++ extraAssemblySettings
 
   def extraAssemblySettings() = Seq(
@@ -599,6 +611,17 @@ object SparkBuild extends Build {
     }
   )
 
+  def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
+    name := "old-deps",
+    scalaVersion := "2.10.4",
+    retrieveManaged := true,
+    retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
+    libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq", 
+      "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
+      "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx", 
+      "spark-core").map(sparkPreviousArtifact(_).get intransitive())
+  )
+
   def twitterSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-twitter",
     previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
diff --git a/python/pyspark/mllib/_common.py b/python/pyspark/mllib/_common.py
index a411a5d5914e0..e609b60a0f968 100644
--- a/python/pyspark/mllib/_common.py
+++ b/python/pyspark/mllib/_common.py
@@ -454,7 +454,7 @@ def _squared_distance(v1, v2):
     v2 = _convert_vector(v2)
     if type(v1) == ndarray and type(v2) == ndarray:
         diff = v1 - v2
-        return diff.dot(diff)
+        return numpy.dot(diff, diff)
     elif type(v1) == ndarray:
         return v2.squared_distance(v1)
     else:
@@ -469,10 +469,12 @@ def _dot(vec, target):
     calling numpy.dot of the two vectors, but for SciPy ones, we
     have to transpose them because they're column vectors.
     """
-    if type(vec) == ndarray or type(vec) == SparseVector:
+    if type(vec) == ndarray:
+        return numpy.dot(vec, target)
+    elif type(vec) == SparseVector:
         return vec.dot(target)
     elif type(vec) == list:
-        return _convert_vector(vec).dot(target)
+        return numpy.dot(_convert_vector(vec), target)
     else:
         return vec.transpose().dot(target)[0]
 
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index da364e19874b1..ddd22850a819c 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -736,7 +736,7 @@ def max(self):
 
     def min(self):
         """
-        Find the maximum item in this RDD.
+        Find the minimum item in this RDD.
 
         >>> sc.parallelize([1.0, 5.0, 43.0, 10.0]).min()
         1.0
@@ -1221,6 +1221,20 @@ def _mergeCombiners(iterator):
                     combiners[k] = mergeCombiners(combiners[k], v)
             return combiners.iteritems()
         return shuffled.mapPartitions(_mergeCombiners)
+   
+    def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None):
+        """
+        Aggregate the values of each key, using given combine functions and a neutral "zero value".
+        This function can return a different result type, U, than the type of the values in this RDD,
+        V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
+        The former operation is used for merging values within a partition, and the latter is used
+        for merging values between partitions. To avoid memory allocation, both of these functions are
+        allowed to modify and return their first argument instead of creating a new U.
+        """
+        def createZero():
+          return copy.deepcopy(zeroValue)
+        
+        return self.combineByKey(lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions)
 
     def foldByKey(self, zeroValue, func, numPartitions=None):
         """
@@ -1234,7 +1248,10 @@ def foldByKey(self, zeroValue, func, numPartitions=None):
         >>> rdd.foldByKey(0, add).collect()
         [('a', 2), ('b', 1)]
         """
-        return self.combineByKey(lambda v: func(zeroValue, v), func, func, numPartitions)
+        def createZero():
+          return copy.deepcopy(zeroValue)
+
+        return self.combineByKey(lambda v: func(createZero(), v), func, func, numPartitions)
 
 
     # TODO: support variant with custom partitioner
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index b4e9618cc25b5..960d0a82448aa 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -117,7 +117,7 @@ def parquetFile(self, path):
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> srdd.saveAsParquetFile(parquetFile)
         >>> srdd2 = sqlCtx.parquetFile(parquetFile)
-        >>> srdd.collect() == srdd2.collect()
+        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
         jschema_rdd = self._ssql_ctx.parquetFile(path)
@@ -141,7 +141,7 @@ def table(self, tableName):
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> sqlCtx.registerRDDAsTable(srdd, "table1")
         >>> srdd2 = sqlCtx.table("table1")
-        >>> srdd.collect() == srdd2.collect()
+        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
         return SchemaRDD(self._ssql_ctx.table(tableName), self)
@@ -293,7 +293,7 @@ def saveAsParquetFile(self, path):
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> srdd.saveAsParquetFile(parquetFile)
         >>> srdd2 = sqlCtx.parquetFile(parquetFile)
-        >>> srdd2.collect() == srdd.collect()
+        >>> sorted(srdd2.collect()) == sorted(srdd.collect())
         True
         """
         self._jschema_rdd.saveAsParquetFile(path)
@@ -307,7 +307,7 @@ def registerAsTable(self, name):
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> srdd.registerAsTable("test")
         >>> srdd2 = sqlCtx.sql("select * from test")
-        >>> srdd.collect() == srdd2.collect()
+        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
         True
         """
         self._jschema_rdd.registerAsTable(name)
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 184ee810b861b..c15bb457759ed 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -188,6 +188,21 @@ def test_deleting_input_files(self):
         os.unlink(tempFile.name)
         self.assertRaises(Exception, lambda: filtered_data.count())
 
+    def testAggregateByKey(self):
+        data = self.sc.parallelize([(1, 1), (1, 1), (3, 2), (5, 1), (5, 3)], 2)
+        def seqOp(x, y):
+            x.add(y)
+            return x
+
+        def combOp(x, y):
+            x |= y
+            return x
+          
+        sets = dict(data.aggregateByKey(set(), seqOp, combOp).collect())
+        self.assertEqual(3, len(sets))
+        self.assertEqual(set([1]), sets[1])
+        self.assertEqual(set([2]), sets[3])
+        self.assertEqual(set([1, 3]), sets[5])
 
 class TestIO(PySparkTestCase):
 
diff --git a/python/run-tests b/python/run-tests
index 36a96121cbc0d..9282aa47e8375 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -32,7 +32,8 @@ rm -f unit-tests.log
 rm -rf metastore warehouse
 
 function run_test() {
-    SPARK_TESTING=0 $FWDIR/bin/pyspark $1 2>&1 | tee -a > unit-tests.log
+    echo "Running test: $1"
+    SPARK_TESTING=1 $FWDIR/bin/pyspark $1 2>&1 | tee -a > unit-tests.log
     FAILED=$((PIPESTATUS[0]||$FAILED))
 
     # Fail and exit on the first test failure.
@@ -43,18 +44,23 @@ function run_test() {
         echo -en "\033[0m"  # No color
         exit -1
     fi
-
 }
 
+echo "Running PySpark tests. Output is in python/unit-tests.log."
+
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
 run_test "pyspark/conf.py"
 if [ -n "$_RUN_SQL_TESTS" ]; then
   run_test "pyspark/sql.py"
 fi
-run_test "-m doctest pyspark/broadcast.py"
-run_test "-m doctest pyspark/accumulators.py"
-run_test "-m doctest pyspark/serializers.py"
+# These tests are included in the module-level docs, and so must
+# be handled on a higher level rather than within the python file.
+export PYSPARK_DOC_TEST=1
+run_test "pyspark/broadcast.py"
+run_test "pyspark/accumulators.py"
+run_test "pyspark/serializers.py"
+unset PYSPARK_DOC_TEST
 run_test "pyspark/tests.py"
 run_test "pyspark/mllib/_common.py"
 run_test "pyspark/mllib/classification.py"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 36758f3114e59..46fcfbb9e26ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -111,6 +111,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val APPROXIMATE = Keyword("APPROXIMATE")
   protected val AVG = Keyword("AVG")
   protected val BY = Keyword("BY")
+  protected val CACHE = Keyword("CACHE")
   protected val CAST = Keyword("CAST")
   protected val COUNT = Keyword("COUNT")
   protected val DESC = Keyword("DESC")
@@ -149,7 +150,9 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val SEMI = Keyword("SEMI")
   protected val STRING = Keyword("STRING")
   protected val SUM = Keyword("SUM")
+  protected val TABLE = Keyword("TABLE")
   protected val TRUE = Keyword("TRUE")
+  protected val UNCACHE = Keyword("UNCACHE")
   protected val UNION = Keyword("UNION")
   protected val WHERE = Keyword("WHERE")
 
@@ -189,7 +192,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
         UNION ~ ALL ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) } |
         UNION ~ opt(DISTINCT) ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
       )
-    | insert
+    | insert | cache
   )
 
   protected lazy val select: Parser[LogicalPlan] =
@@ -220,6 +223,11 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
         InsertIntoTable(r, Map[String, Option[String]](), s, overwrite)
     }
 
+  protected lazy val cache: Parser[LogicalPlan] =
+    (CACHE ^^^ true | UNCACHE ^^^ false) ~ TABLE ~ ident ^^ {
+      case doCache ~ _ ~ tableName => CacheCommand(tableName, doCache)
+    }
+
   protected lazy val projections: Parser[Seq[Expression]] = repsep(projection, ",")
 
   protected lazy val projection: Parser[Expression] =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 3cf163f9a9a75..d177339d40ae5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -175,6 +175,8 @@ package object dsl {
 
     def where(condition: Expression) = Filter(condition, logicalPlan)
 
+    def limit(limitExpr: Expression) = Limit(limitExpr, logicalPlan)
+
     def join(
         otherPlan: LogicalPlan,
         joinType: JoinType = Inner,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 420303408451f..c074b7bb01e57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -76,7 +76,8 @@ trait CaseConversionExpression {
   type EvaluatedType = Any
 
   def convert(v: String): String
-  
+
+  override def foldable: Boolean = child.foldable
   def nullable: Boolean = child.nullable
   def dataType: DataType = StringType
 
@@ -142,6 +143,8 @@ case class RLike(left: Expression, right: Expression)
 case class Upper(child: Expression) extends UnaryExpression with CaseConversionExpression {
   
   override def convert(v: String): String = v.toUpperCase()
+
+  override def toString() = s"Upper($child)"
 }
 
 /**
@@ -150,4 +153,6 @@ case class Upper(child: Expression) extends UnaryExpression with CaseConversionE
 case class Lower(child: Expression) extends UnaryExpression with CaseConversionExpression {
   
   override def convert(v: String): String = v.toLowerCase()
+
+  override def toString() = s"Lower($child)"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 406ffd6801e98..25a347bec0e4c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -19,22 +19,29 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.FullOuter
+import org.apache.spark.sql.catalyst.plans.LeftOuter
+import org.apache.spark.sql.catalyst.plans.RightOuter
+import org.apache.spark.sql.catalyst.plans.LeftSemi
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.types._
 
 object Optimizer extends RuleExecutor[LogicalPlan] {
   val batches =
+    Batch("Combine Limits", FixedPoint(100),
+      CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
       ConstantFolding,
       BooleanSimplification,
       SimplifyFilters,
-      SimplifyCasts) ::
+      SimplifyCasts,
+      SimplifyCaseConversionExpressions) ::
     Batch("Filter Pushdown", FixedPoint(100),
       CombineFilters,
       PushPredicateThroughProject,
-      PushPredicateThroughInnerJoin,
+      PushPredicateThroughJoin,
       ColumnPruning) :: Nil
 }
 
@@ -100,8 +107,8 @@ object ColumnPruning extends Rule[LogicalPlan] {
 object NullPropagation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
-      case e @ Count(Literal(null, _)) => Literal(0, e.dataType)
-      case e @ Sum(Literal(c, _)) if c == 0 => Literal(0, e.dataType)
+      case e @ Count(Literal(null, _)) => Cast(Literal(0L), e.dataType)
+      case e @ Sum(Literal(c, _)) if c == 0 => Cast(Literal(0L), e.dataType)
       case e @ Average(Literal(c, _)) if c == 0 => Literal(0.0, e.dataType)
       case e @ IsNull(c) if c.nullable == false => Literal(false, BooleanType)
       case e @ IsNotNull(c) if c.nullable == false => Literal(true, BooleanType)
@@ -126,18 +133,6 @@ object NullPropagation extends Rule[LogicalPlan] {
           case Literal(candidate, _) if candidate == v => true
           case _ => false
         })) => Literal(true, BooleanType)
-      case e: UnaryMinus => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
-      case e: Cast => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
-      case e: Not => e.child match {
-        case Literal(null, _) => Literal(null, e.dataType)
-        case _ => e
-      }
       // Put exceptional cases above if any
       case e: BinaryArithmetic => e.children match {
         case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
@@ -254,28 +249,98 @@ object PushPredicateThroughProject extends Rule[LogicalPlan] {
 
 /**
  * Pushes down [[catalyst.plans.logical.Filter Filter]] operators where the `condition` can be
- * evaluated using only the attributes of the left or right side of an inner join.  Other
+ * evaluated using only the attributes of the left or right side of a join.  Other
  * [[catalyst.plans.logical.Filter Filter]] conditions are moved into the `condition` of the
  * [[catalyst.plans.logical.Join Join]].
+ * And also Pushes down the join filter, where the `condition` can be evaluated using only the 
+ * attributes of the left or right side of sub query when applicable. 
+ * 
+ * Check https://cwiki.apache.org/confluence/display/Hive/OuterJoinBehavior for more details
  */
-object PushPredicateThroughInnerJoin extends Rule[LogicalPlan] with PredicateHelper {
+object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
+  // split the condition expression into 3 parts, 
+  // (canEvaluateInLeftSide, canEvaluateInRightSide, haveToEvaluateWithBothSide) 
+  private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = {
+    val (leftEvaluateCondition, rest) =
+        condition.partition(_.references subsetOf left.outputSet)
+    val (rightEvaluateCondition, commonCondition) = 
+        rest.partition(_.references subsetOf right.outputSet)
+
+    (leftEvaluateCondition, rightEvaluateCondition, commonCondition)
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case f @ Filter(filterCondition, Join(left, right, Inner, joinCondition)) =>
-      val allConditions =
-        splitConjunctivePredicates(filterCondition) ++
-          joinCondition.map(splitConjunctivePredicates).getOrElse(Nil)
-
-      // Split the predicates into those that can be evaluated on the left, right, and those that
-      // must be evaluated after the join.
-      val (rightConditions, leftOrJoinConditions) =
-        allConditions.partition(_.references subsetOf right.outputSet)
-      val (leftConditions, joinConditions) =
-        leftOrJoinConditions.partition(_.references subsetOf left.outputSet)
-
-      // Build the new left and right side, optionally with the pushed down filters.
-      val newLeft = leftConditions.reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
-      val newRight = rightConditions.reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
-      Join(newLeft, newRight, Inner, joinConditions.reduceLeftOption(And))
+    // push the where condition down into join filter
+    case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition)) =>
+      val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = 
+        split(splitConjunctivePredicates(filterCondition), left, right)
+
+      joinType match {
+        case Inner =>
+          // push down the single side `where` condition into respective sides
+          val newLeft = leftFilterConditions.
+            reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
+          val newRight = rightFilterConditions.
+            reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
+          val newJoinCond = (commonFilterCondition ++ joinCondition).reduceLeftOption(And)
+
+          Join(newLeft, newRight, Inner, newJoinCond)
+        case RightOuter =>
+          // push down the right side only `where` condition
+          val newLeft = left
+          val newRight = rightFilterConditions.
+            reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
+          val newJoinCond = joinCondition
+          val newJoin = Join(newLeft, newRight, RightOuter, newJoinCond)
+
+          (leftFilterConditions ++ commonFilterCondition).
+            reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin)
+        case _ @ (LeftOuter | LeftSemi) =>
+          // push down the left side only `where` condition
+          val newLeft = leftFilterConditions.
+            reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
+          val newRight = right
+          val newJoinCond = joinCondition
+          val newJoin = Join(newLeft, newRight, joinType, newJoinCond)
+
+          (rightFilterConditions ++ commonFilterCondition).
+            reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin)
+        case FullOuter => f // DO Nothing for Full Outer Join
+      }
+
+    // push down the join filter into sub query scanning if applicable
+    case f @ Join(left, right, joinType, joinCondition) =>
+      val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = 
+        split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
+
+      joinType match {
+        case Inner =>
+          // push down the single side only join filter for both sides sub queries
+          val newLeft = leftJoinConditions.
+            reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
+          val newRight = rightJoinConditions.
+            reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
+          val newJoinCond = commonJoinCondition.reduceLeftOption(And)
+
+          Join(newLeft, newRight, Inner, newJoinCond)
+        case RightOuter =>
+          // push down the left side only join filter for left side sub query
+          val newLeft = leftJoinConditions.
+            reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
+          val newRight = right
+          val newJoinCond = (rightJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
+
+          Join(newLeft, newRight, RightOuter, newJoinCond)
+        case _ @ (LeftOuter | LeftSemi) =>
+          // push down the right side only join filter for right sub query
+          val newLeft = left
+          val newRight = rightJoinConditions.
+            reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
+          val newJoinCond = (leftJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
+
+          Join(newLeft, newRight, joinType, newJoinCond)
+        case FullOuter => f
+      }
   }
 }
 
@@ -288,3 +353,29 @@ object SimplifyCasts extends Rule[LogicalPlan] {
     case Cast(e, dataType) if e.dataType == dataType => e
   }
 }
+
+/**
+ * Combines two adjacent [[catalyst.plans.logical.Limit Limit]] operators into one, merging the
+ * expressions into one single expression.
+ */
+object CombineLimits extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case ll @ Limit(le, nl @ Limit(ne, grandChild)) =>
+      Limit(If(LessThan(ne, le), ne, le), grandChild)
+  }
+}
+
+/**
+ * Removes the inner [[catalyst.expressions.CaseConversionExpression]] that are unnecessary because
+ * the inner conversion is overwritten by the outer one.
+ */
+object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case q: LogicalPlan => q transformExpressionsUp {
+      case Upper(Upper(child)) => Upper(child)
+      case Upper(Lower(child)) => Upper(child)
+      case Lower(Upper(child)) => Lower(child)
+      case Lower(Lower(child)) => Lower(child)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 7eeb98aea6368..0933a31c362d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.types.{StringType, StructType}
+import org.apache.spark.sql.catalyst.types.StructType
 import org.apache.spark.sql.catalyst.trees
 
 abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
@@ -96,39 +96,6 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] {
   def references = Set.empty
 }
 
-/**
- * A logical node that represents a non-query command to be executed by the system.  For example,
- * commands can be used by parsers to represent DDL operations.
- */
-abstract class Command extends LeafNode {
-  self: Product =>
-  def output: Seq[Attribute] = Seq.empty  // TODO: SPARK-2081 should fix this
-}
-
-/**
- * Returned for commands supported by a given parser, but not catalyst.  In general these are DDL
- * commands that are passed directly to another system.
- */
-case class NativeCommand(cmd: String) extends Command
-
-/**
- * Commands of the form "SET (key) (= value)".
- */
-case class SetCommand(key: Option[String], value: Option[String]) extends Command {
-  override def output = Seq(
-    AttributeReference("key", StringType, nullable = false)(),
-    AttributeReference("value", StringType, nullable = false)()
-  )
-}
-
-/**
- * Returned by a parser when the users only wants to see what query plan would be executed, without
- * actually performing the execution.
- */
-case class ExplainCommand(plan: LogicalPlan) extends Command {
-  override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
-}
-
 /**
  * A logical plan node with single child.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index d3347b622f3d8..b777cf4249196 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -135,9 +135,9 @@ case class Aggregate(
   def references = (groupingExpressions ++ aggregateExpressions).flatMap(_.references).toSet
 }
 
-case class Limit(limit: Expression, child: LogicalPlan) extends UnaryNode {
+case class Limit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
   def output = child.output
-  def references = limit.references
+  def references = limitExpr.references
 }
 
 case class Subquery(alias: String, child: LogicalPlan) extends UnaryNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
new file mode 100644
index 0000000000000..d05c9652753e0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Attribute}
+import org.apache.spark.sql.catalyst.types.StringType
+
+/**
+ * A logical node that represents a non-query command to be executed by the system.  For example,
+ * commands can be used by parsers to represent DDL operations.
+ */
+abstract class Command extends LeafNode {
+  self: Product =>
+  def output: Seq[Attribute] = Seq.empty  // TODO: SPARK-2081 should fix this
+}
+
+/**
+ * Returned for commands supported by a given parser, but not catalyst.  In general these are DDL
+ * commands that are passed directly to another system.
+ */
+case class NativeCommand(cmd: String) extends Command
+
+/**
+ * Commands of the form "SET (key) (= value)".
+ */
+case class SetCommand(key: Option[String], value: Option[String]) extends Command {
+  override def output = Seq(
+    AttributeReference("key", StringType, nullable = false)(),
+    AttributeReference("value", StringType, nullable = false)()
+  )
+}
+
+/**
+ * Returned by a parser when the users only wants to see what query plan would be executed, without
+ * actually performing the execution.
+ */
+case class ExplainCommand(plan: LogicalPlan) extends Command {
+  override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
+}
+
+/**
+ * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command.
+ */
+case class CacheCommand(tableName: String, doCache: Boolean) extends Command
+
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
new file mode 100644
index 0000000000000..714f01843c0f5
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+
+class CombiningLimitsSuite extends OptimizerTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Combine Limit", FixedPoint(2),
+        CombineLimits) ::
+      Batch("Constant Folding", FixedPoint(3),
+        NullPropagation,
+        ConstantFolding,
+        BooleanSimplification) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  test("limits: combines two limits") {
+    val originalQuery =
+      testRelation
+        .select('a)
+        .limit(10)
+        .limit(5)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select('a)
+        .limit(5).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("limits: combines three limits") {
+    val originalQuery =
+      testRelation
+        .select('a)
+        .limit(2)
+        .limit(7)
+        .limit(5)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select('a)
+        .limit(2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 20dfba847790c..6efc0e211eb21 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.types.{DoubleType, IntegerType}
+import org.apache.spark.sql.catalyst.types._
 
 // For implicit conversions
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -173,4 +173,63 @@ class ConstantFoldingSuite extends OptimizerTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("Constant folding test: expressions have null literals") {
+    val originalQuery =
+      testRelation
+        .select(
+          IsNull(Literal(null)) as 'c1,
+          IsNotNull(Literal(null)) as 'c2,
+
+          GetItem(Literal(null, ArrayType(IntegerType)), 1) as 'c3,
+          GetItem(Literal(Seq(1), ArrayType(IntegerType)), Literal(null, IntegerType)) as 'c4,
+          GetField(
+            Literal(null, StructType(Seq(StructField("a", IntegerType, true)))),
+            "a") as 'c5,
+
+          UnaryMinus(Literal(null, IntegerType)) as 'c6,
+          Cast(Literal(null), IntegerType) as 'c7,
+          Not(Literal(null, BooleanType)) as 'c8,
+
+          Add(Literal(null, IntegerType), 1) as 'c9,
+          Add(1, Literal(null, IntegerType)) as 'c10,
+
+          Equals(Literal(null, IntegerType), 1) as 'c11,
+          Equals(1, Literal(null, IntegerType)) as 'c12,
+
+          Like(Literal(null, StringType), "abc") as 'c13,
+          Like("abc", Literal(null, StringType)) as 'c14,
+
+          Upper(Literal(null, StringType)) as 'c15)
+
+    val optimized = Optimize(originalQuery.analyze)
+
+    val correctAnswer =
+      testRelation
+        .select(
+          Literal(true) as 'c1,
+          Literal(false) as 'c2,
+
+          Literal(null, IntegerType) as 'c3,
+          Literal(null, IntegerType) as 'c4,
+          Literal(null, IntegerType) as 'c5,
+
+          Literal(null, IntegerType) as 'c6,
+          Literal(null, IntegerType) as 'c7,
+          Literal(null, BooleanType) as 'c8,
+
+          Literal(null, IntegerType) as 'c9,
+          Literal(null, IntegerType) as 'c10,
+
+          Literal(null, BooleanType) as 'c11,
+          Literal(null, BooleanType) as 'c12,
+
+          Literal(null, BooleanType) as 'c13,
+          Literal(null, BooleanType) as 'c14,
+
+          Literal(null, StringType) as 'c15)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index ef47850455a37..0cada785b6630 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.LeftOuter
+import org.apache.spark.sql.catalyst.plans.RightOuter
 import org.apache.spark.sql.catalyst.rules._
-
-/* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 
@@ -35,7 +35,7 @@ class FilterPushdownSuite extends OptimizerTest {
       Batch("Filter Pushdown", Once,
         CombineFilters,
         PushPredicateThroughProject,
-        PushPredicateThroughInnerJoin) :: Nil
+        PushPredicateThroughJoin) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
@@ -161,6 +161,184 @@ class FilterPushdownSuite extends OptimizerTest {
 
     comparePlans(optimized, correctAnswer)
   }
+  
+  test("joins: push down left outer join #1") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter)
+        .where("x.b".attr === 1 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('b === 1)
+    val correctAnswer =
+      left.join(y, LeftOuter).where("y.b".attr === 2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down right outer join #1") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter)
+        .where("x.b".attr === 1 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val right = testRelation.where('b === 2).subquery('d)
+    val correctAnswer =
+      x.join(right, RightOuter).where("x.b".attr === 1).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down left outer join #2") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter, Some("x.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('b === 2).subquery('d)
+    val correctAnswer =
+      left.join(y, LeftOuter, Some("d.b".attr === 1)).where("y.b".attr === 2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down right outer join #2") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter, Some("y.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val right = testRelation.where('b === 2).subquery('d)
+    val correctAnswer =
+      x.join(right, RightOuter, Some("d.b".attr === 1)).where("x.b".attr === 2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down left outer join #3") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter, Some("y.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('b === 2).subquery('l)
+    val right = testRelation.where('b === 1).subquery('r)
+    val correctAnswer =
+      left.join(right, LeftOuter).where("r.b".attr === 2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down right outer join #3") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter, Some("y.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val right = testRelation.where('b === 2).subquery('r)
+    val correctAnswer =
+      x.join(right, RightOuter, Some("r.b".attr === 1)).where("x.b".attr === 2).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down left outer join #4") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter, Some("y.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2 && "x.c".attr === "y.c".attr)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('b === 2).subquery('l)
+    val right = testRelation.where('b === 1).subquery('r)
+    val correctAnswer =
+      left.join(right, LeftOuter).where("r.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down right outer join #4") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter, Some("y.b".attr === 1))
+        .where("x.b".attr === 2 && "y.b".attr === 2 && "x.c".attr === "y.c".attr)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.subquery('l)
+    val right = testRelation.where('b === 2).subquery('r)
+    val correctAnswer =
+      left.join(right, RightOuter, Some("r.b".attr === 1)).
+        where("l.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down left outer join #5") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, LeftOuter, Some("y.b".attr === 1 && "x.a".attr === 3))
+        .where("x.b".attr === 2 && "y.b".attr === 2 && "x.c".attr === "y.c".attr)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('b === 2).subquery('l)
+    val right = testRelation.where('b === 1).subquery('r)
+    val correctAnswer =
+      left.join(right, LeftOuter, Some("l.a".attr===3)).
+        where("r.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("joins: push down right outer join #5") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+
+    val originalQuery = {
+      x.join(y, RightOuter, Some("y.b".attr === 1 && "x.a".attr === 3))
+        .where("x.b".attr === 2 && "y.b".attr === 2 && "x.c".attr === "y.c".attr)
+    }
+
+    val optimized = Optimize(originalQuery.analyze)
+    val left = testRelation.where('a === 3).subquery('l)
+    val right = testRelation.where('b === 2).subquery('r)
+    val correctAnswer =
+      left.join(right, RightOuter, Some("r.b".attr === 1)).
+        where("l.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 
   test("joins: can't push down") {
     val x = testRelation.subquery('x)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
new file mode 100644
index 0000000000000..df1409fe7baee
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCaseConversionExpressionsSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+/* Implicit conversions */
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+
+class SimplifyCaseConversionExpressionsSuite extends OptimizerTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Simplify CaseConversionExpressions", Once,
+        SimplifyCaseConversionExpressions) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.string)
+
+  test("simplify UPPER(UPPER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Upper(Upper('a)) as 'u)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select(Upper('a) as 'u)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify UPPER(LOWER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Upper(Lower('a)) as 'u)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select(Upper('a) as 'u)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify LOWER(UPPER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Lower(Upper('a)) as 'l)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .select(Lower('a) as 'l)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify LOWER(LOWER(str))") {
+    val originalQuery =
+      testRelation
+        .select(Lower(Lower('a)) as 'l)
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .select(Lower('a) as 'l)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 021e0e8245a0d..264192ed1aa26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -188,6 +188,15 @@ class SQLContext(@transient val sparkContext: SparkContext)
     }
   }
 
+  /** Returns true if the table is currently cached in-memory. */
+  def isCached(tableName: String): Boolean = {
+    val relation = catalog.lookupRelation(None, tableName)
+    EliminateAnalysisOperators(relation) match {
+      case SparkLogicalPlan(_: InMemoryColumnarTableScan) => true
+      case _ => false
+    }
+  }
+
   protected[sql] class SparkPlanner extends SparkStrategies {
     val sparkContext = self.sparkContext
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 8855c4e876917..7ad8edf5a5a6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -178,14 +178,18 @@ class SchemaRDD(
   def orderBy(sortExprs: SortOrder*): SchemaRDD =
     new SchemaRDD(sqlContext, Sort(sortExprs, logicalPlan))
 
+  @deprecated("use limit with integer argument", "1.1.0")
+  def limit(limitExpr: Expression): SchemaRDD =
+    new SchemaRDD(sqlContext, Limit(limitExpr, logicalPlan))
+
   /**
-   * Limits the results by the given expressions.
+   * Limits the results by the given integer.
    * {{{
    *   schemaRDD.limit(10)
    * }}}
    */
-  def limit(limitExpr: Expression): SchemaRDD =
-    new SchemaRDD(sqlContext, Limit(limitExpr, logicalPlan))
+  def limit(limitNum: Int): SchemaRDD =
+    new SchemaRDD(sqlContext, Limit(Literal(limitNum), logicalPlan))
 
   /**
    * Performs a grouping followed by an aggregation.
@@ -374,6 +378,8 @@ class SchemaRDD(
 
   override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
 
+  override def take(num: Int): Array[Row] = limit(num).collect()
+
   // =======================================================================
   // Base RDD functions that do NOT change schema
   // =======================================================================
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 0455748d40eec..f2f95dfe27e69 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -239,10 +239,11 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.SetCommand(key, value) =>
         Seq(execution.SetCommandPhysical(key, value, plan.output)(context))
       case logical.ExplainCommand(child) =>
-        val qe = context.executePlan(child)
-        Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context))
+        val executedPlan = context.executePlan(child).executedPlan
+        Seq(execution.ExplainCommandPhysical(executedPlan, plan.output)(context))
+      case logical.CacheCommand(tableName, cache) =>
+        Seq(execution.CacheCommandPhysical(tableName, cache)(context))
       case _ => Nil
     }
   }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 9364506691f38..be26d19e66862 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -65,3 +65,26 @@ case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
 
   override def otherCopyArgs = context :: Nil
 }
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class CacheCommandPhysical(tableName: String, doCache: Boolean)(@transient context: SQLContext)
+  extends LeafNode {
+
+  lazy val commandSideEffect = {
+    if (doCache) {
+      context.cacheTable(tableName)
+    } else {
+      context.uncacheTable(tableName)
+    }
+  }
+
+  override def execute(): RDD[Row] = {
+    commandSideEffect
+    context.emptyResult
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
index 88ff3d49a79b3..8d7a5ba59f96a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
@@ -169,7 +169,7 @@ case class LeftSemiJoinHash(
   def execute() = {
 
     buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
-      val hashTable = new java.util.HashSet[Row]()
+      val hashSet = new java.util.HashSet[Row]()
       var currentRow: Row = null
 
       // Create a Hash set of buildKeys
@@ -177,43 +177,17 @@ case class LeftSemiJoinHash(
         currentRow = buildIter.next()
         val rowKey = buildSideKeyGenerator(currentRow)
         if(!rowKey.anyNull) {
-          val keyExists = hashTable.contains(rowKey)
+          val keyExists = hashSet.contains(rowKey)
           if (!keyExists) {
-            hashTable.add(rowKey)
+            hashSet.add(rowKey)
           }
         }
       }
 
-      new Iterator[Row] {
-        private[this] var currentStreamedRow: Row = _
-        private[this] var currentHashMatched: Boolean = false
-
-        private[this] val joinKeys = streamSideKeyGenerator()
-
-        override final def hasNext: Boolean =
-          streamIter.hasNext && fetchNext()
-
-        override final def next() = {
-          currentStreamedRow
-        }
-
-        /**
-         * Searches the streamed iterator for the next row that has at least one match in hashtable.
-         *
-         * @return true if the search is successful, and false the streamed iterator runs out of
-         *         tuples.
-         */
-        private final def fetchNext(): Boolean = {
-          currentHashMatched = false
-          while (!currentHashMatched && streamIter.hasNext) {
-            currentStreamedRow = streamIter.next()
-            if (!joinKeys(currentStreamedRow).anyNull) {
-              currentHashMatched = hashTable.contains(joinKeys.currentValue)
-            }
-          }
-          currentHashMatched
-        }
-      }
+      val joinKeys = streamSideKeyGenerator()
+      streamIter.filter(current => {
+        !joinKeys(current).anyNull && hashSet.contains(joinKeys.currentValue)
+      })
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 0331f90272a99..ebca3adc2ff01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -70,4 +70,20 @@ class CachedTableSuite extends QueryTest {
     TestSQLContext.sql("SELECT * FROM testData a JOIN testData b ON a.key = b.key")
     TestSQLContext.uncacheTable("testData")
   }
+
+  test("'CACHE TABLE' and 'UNCACHE TABLE' SQL statement") {
+    TestSQLContext.sql("CACHE TABLE testData")
+    TestSQLContext.table("testData").queryExecution.executedPlan match {
+      case _: InMemoryColumnarTableScan => // Found evidence of caching
+      case _ => fail(s"Table 'testData' should be cached")
+    }
+    assert(TestSQLContext.isCached("testData"), "Table 'testData' should be cached")
+
+    TestSQLContext.sql("UNCACHE TABLE testData")
+    TestSQLContext.table("testData").queryExecution.executedPlan match {
+      case _: InMemoryColumnarTableScan => fail(s"Table 'testData' should not be cached")
+      case _ => // Found evidence of uncaching
+    }
+    assert(!TestSQLContext.isCached("testData"), "Table 'testData' should not be cached")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
index 5eb73a4eff980..08293f7f0ca30 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
@@ -28,6 +28,7 @@ class SQLConfSuite extends QueryTest {
   val testVal = "test.val.0"
 
   test("programmatic ways of basic setting and getting") {
+    clear()
     assert(getOption(testKey).isEmpty)
     assert(getAll.toSet === Set())
 
@@ -48,6 +49,7 @@ class SQLConfSuite extends QueryTest {
   }
 
   test("parse SQL set commands") {
+    clear()
     sql(s"set $testKey=$testVal")
     assert(get(testKey, testVal + "_") == testVal)
     assert(TestSQLContext.get(testKey, testVal + "_") == testVal)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index f2d850ad6aa56..c1fc99f077431 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -136,6 +136,12 @@ class SQLQuerySuite extends QueryTest {
       2.0)
   }
 
+  test("average overflow") {
+    checkAnswer(
+      sql("SELECT AVG(a),b FROM largeAndSmallInts group by b"),
+      Seq((2147483645.0,1),(2.0,2)))
+  }
+  
   test("count") {
     checkAnswer(
       sql("SELECT COUNT(*) FROM testData2"),
@@ -396,6 +402,7 @@ class SQLQuerySuite extends QueryTest {
       sql(s"SET $nonexistentKey"),
       Seq(Seq(s"$nonexistentKey is undefined"))
     )
+    clear()
   }
 
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 05de736bbce1b..330b20b315d63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -30,6 +30,17 @@ object TestData {
     (1 to 100).map(i => TestData(i, i.toString)))
   testData.registerAsTable("testData")
 
+  case class LargeAndSmallInts(a: Int, b: Int)
+  val largeAndSmallInts: SchemaRDD =
+    TestSQLContext.sparkContext.parallelize(
+      LargeAndSmallInts(2147483644, 1) ::
+      LargeAndSmallInts(1, 2) ::
+      LargeAndSmallInts(2147483645, 1) ::
+      LargeAndSmallInts(2, 2) ::
+      LargeAndSmallInts(2147483646, 1) ::
+      LargeAndSmallInts(3, 2) :: Nil)
+  largeAndSmallInts.registerAsTable("largeAndSmallInts")
+  
   case class TestData2(a: Int, b: Int)
   val testData2: SchemaRDD =
     TestSQLContext.sparkContext.parallelize(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4e74d9bc909fa..b745d8ffd8f17 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -218,15 +218,19 @@ private[hive] object HiveQl {
           case Array(key, value) => // "set key=value"
             SetCommand(Some(key), Some(value))
         }
-      } else if (sql.toLowerCase.startsWith("add jar")) {
+      } else if (sql.trim.toLowerCase.startsWith("cache table")) {
+        CacheCommand(sql.drop(12).trim, true)
+      } else if (sql.trim.toLowerCase.startsWith("uncache table")) {
+        CacheCommand(sql.drop(14).trim, false)
+      } else if (sql.trim.toLowerCase.startsWith("add jar")) {
         AddJar(sql.drop(8))
-      } else if (sql.toLowerCase.startsWith("add file")) {
+      } else if (sql.trim.toLowerCase.startsWith("add file")) {
         AddFile(sql.drop(9))
-      } else if (sql.startsWith("dfs")) {
+      } else if (sql.trim.startsWith("dfs")) {
         DfsCommand(sql)
-      } else if (sql.startsWith("source")) {
+      } else if (sql.trim.startsWith("source")) {
         SourceCommand(sql.split(" ").toSeq match { case Seq("source", filePath) => filePath })
-      } else if (sql.startsWith("!")) {
+      } else if (sql.trim.startsWith("!")) {
         ShellCommand(sql.drop(1))
       } else {
         val tree = getAst(sql)
@@ -839,11 +843,11 @@ private[hive] object HiveQl {
     case Token("TOK_FUNCTIONDI", Token(SUM(), Nil) :: arg :: Nil) => SumDistinct(nodeToExpr(arg))
     case Token("TOK_FUNCTION", Token(MAX(), Nil) :: arg :: Nil) => Max(nodeToExpr(arg))
     case Token("TOK_FUNCTION", Token(MIN(), Nil) :: arg :: Nil) => Min(nodeToExpr(arg))
-    
+
     /* System functions about string operations */
     case Token("TOK_FUNCTION", Token(UPPER(), Nil) :: arg :: Nil) => Upper(nodeToExpr(arg))
     case Token("TOK_FUNCTION", Token(LOWER(), Nil) :: arg :: Nil) => Lower(nodeToExpr(arg))
-    
+
     /* Casts */
     case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), StringType)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 041e813598d1b..9386008d02d51 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.serde2.avro.AvroSerDe
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand}
+import org.apache.spark.sql.catalyst.plans.logical.{CacheCommand, LogicalPlan, NativeCommand}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.hive._
 
@@ -103,7 +103,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   val inRepoTests = if (System.getProperty("user.dir").endsWith("sql" + File.separator + "hive")) {
     new File("src" + File.separator + "test" + File.separator + "resources" + File.separator)
   } else {
-    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" + 
+    new File("sql" + File.separator + "hive" + File.separator + "src" + File.separator + "test" +
       File.separator + "resources")
   }
 
@@ -130,6 +130,7 @@ class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
     override lazy val analyzed = {
       val describedTables = logical match {
         case NativeCommand(describedTable(tbl)) => tbl :: Nil
+        case CacheCommand(tbl, _) => tbl :: Nil
         case _ => Nil
       }
 
diff --git a/sql/hive/src/test/resources/golden/semijoin-0-1631b71327abf75b96116036b977b26c b/sql/hive/src/test/resources/golden/semijoin-0-1631b71327abf75b96116036b977b26c
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-1-e7c99e1f46d502edbb0925d75aab5f0c b/sql/hive/src/test/resources/golden/semijoin-1-e7c99e1f46d502edbb0925d75aab5f0c
new file mode 100644
index 0000000000000..2ed47ab83dd02
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-1-e7c99e1f46d502edbb0925d75aab5f0c
@@ -0,0 +1,11 @@
+0	val_0
+0	val_0
+0	val_0
+2	val_2
+4	val_4
+5	val_5
+5	val_5
+5	val_5
+8	val_8
+9	val_9
+10	val_10
diff --git a/sql/hive/src/test/resources/golden/semijoin-10-ffd4fb3a903a6725ccb97d5451a3fec6 b/sql/hive/src/test/resources/golden/semijoin-10-ffd4fb3a903a6725ccb97d5451a3fec6
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-11-246a40dcafe077f02397e30d227c330 b/sql/hive/src/test/resources/golden/semijoin-11-246a40dcafe077f02397e30d227c330
new file mode 100644
index 0000000000000..a24bd8c6379e3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-11-246a40dcafe077f02397e30d227c330
@@ -0,0 +1,8 @@
+0	val_0
+0	val_0
+0	val_0
+4	val_2
+8	val_4
+10	val_5
+10	val_5
+10	val_5
diff --git a/sql/hive/src/test/resources/golden/semijoin-12-6d93a9d332ba490835b17f261a5467df b/sql/hive/src/test/resources/golden/semijoin-12-6d93a9d332ba490835b17f261a5467df
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-13-18282d38b6efc0017089ab89b661764f b/sql/hive/src/test/resources/golden/semijoin-13-18282d38b6efc0017089ab89b661764f
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-14-19cfcefb10e1972bec0ffd421cd79de7 b/sql/hive/src/test/resources/golden/semijoin-14-19cfcefb10e1972bec0ffd421cd79de7
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-15-1de6eb3f357bd1c4d02ab4d19d43589 b/sql/hive/src/test/resources/golden/semijoin-15-1de6eb3f357bd1c4d02ab4d19d43589
new file mode 100644
index 0000000000000..03c61a908b071
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-15-1de6eb3f357bd1c4d02ab4d19d43589
@@ -0,0 +1,11 @@
+val_0
+val_0
+val_0
+val_10
+val_2
+val_4
+val_5
+val_5
+val_5
+val_8
+val_9
diff --git a/sql/hive/src/test/resources/golden/semijoin-16-d3a72a90515ac4a8d8e9ac923bcda3d b/sql/hive/src/test/resources/golden/semijoin-16-d3a72a90515ac4a8d8e9ac923bcda3d
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-17-f0f8720cfd11fd71af17b310dc2d1019 b/sql/hive/src/test/resources/golden/semijoin-17-f0f8720cfd11fd71af17b310dc2d1019
new file mode 100644
index 0000000000000..2dcdfd1217ced
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-17-f0f8720cfd11fd71af17b310dc2d1019
@@ -0,0 +1,3 @@
+0	val_0
+0	val_0
+0	val_0
diff --git a/sql/hive/src/test/resources/golden/semijoin-18-f7b2ce472443982e32d954cbb5c96765 b/sql/hive/src/test/resources/golden/semijoin-18-f7b2ce472443982e32d954cbb5c96765
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-19-b1f1c7f701abe81c01e72fb98f0bd13f b/sql/hive/src/test/resources/golden/semijoin-19-b1f1c7f701abe81c01e72fb98f0bd13f
new file mode 100644
index 0000000000000..a3670515e8cc2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-19-b1f1c7f701abe81c01e72fb98f0bd13f
@@ -0,0 +1,3 @@
+val_10
+val_8
+val_9
diff --git a/sql/hive/src/test/resources/golden/semijoin-2-deb9c3286ae8e851b1fdb270085b16bc b/sql/hive/src/test/resources/golden/semijoin-2-deb9c3286ae8e851b1fdb270085b16bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-20-b7a8ebaeb42b2eaba7d97cadc3fd96c1 b/sql/hive/src/test/resources/golden/semijoin-20-b7a8ebaeb42b2eaba7d97cadc3fd96c1
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-21-480418a0646cf7260b494b9eb4821bb6 b/sql/hive/src/test/resources/golden/semijoin-21-480418a0646cf7260b494b9eb4821bb6
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-22-b6aebd98f7636cda7b24e0bf84d7ba41 b/sql/hive/src/test/resources/golden/semijoin-22-b6aebd98f7636cda7b24e0bf84d7ba41
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-23-ed730ccdf552c07e7a82ba6b7fd3fbda b/sql/hive/src/test/resources/golden/semijoin-23-ed730ccdf552c07e7a82ba6b7fd3fbda
new file mode 100644
index 0000000000000..72bc6a6a88f6e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-23-ed730ccdf552c07e7a82ba6b7fd3fbda
@@ -0,0 +1,5 @@
+4	val_2
+8	val_4
+10	val_5
+10	val_5
+10	val_5
diff --git a/sql/hive/src/test/resources/golden/semijoin-24-d16b37134de78980b2bf96029e8265c3 b/sql/hive/src/test/resources/golden/semijoin-24-d16b37134de78980b2bf96029e8265c3
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-25-be2bd011cc80b480b271a08dbf381e9b b/sql/hive/src/test/resources/golden/semijoin-25-be2bd011cc80b480b271a08dbf381e9b
new file mode 100644
index 0000000000000..d89ea1757c712
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-25-be2bd011cc80b480b271a08dbf381e9b
@@ -0,0 +1,19 @@
+0
+0
+0
+0
+0
+0
+2
+4
+4
+5
+5
+5
+8
+8
+9
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-26-f1d3bab29f1ebafa148dbe3816e1da25 b/sql/hive/src/test/resources/golden/semijoin-26-f1d3bab29f1ebafa148dbe3816e1da25
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-27-391c256254d171973f02e7f33672ce1d b/sql/hive/src/test/resources/golden/semijoin-27-391c256254d171973f02e7f33672ce1d
new file mode 100644
index 0000000000000..dbbdae75a52a4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-27-391c256254d171973f02e7f33672ce1d
@@ -0,0 +1,4 @@
+0	val_0
+0	val_0
+0	val_0
+8	val_8
diff --git a/sql/hive/src/test/resources/golden/semijoin-28-b56400f6d9372f353cf7292a2182e963 b/sql/hive/src/test/resources/golden/semijoin-28-b56400f6d9372f353cf7292a2182e963
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-29-9efeef3d3c38e22a74d379978178c4f5 b/sql/hive/src/test/resources/golden/semijoin-29-9efeef3d3c38e22a74d379978178c4f5
new file mode 100644
index 0000000000000..07c61afb5124b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-29-9efeef3d3c38e22a74d379978178c4f5
@@ -0,0 +1,14 @@
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+0	val_0	0	val_0
+4	val_4	4	val_2
+8	val_8	8	val_4
+10	val_10	10	val_5
+10	val_10	10	val_5
+10	val_10	10	val_5
diff --git a/sql/hive/src/test/resources/golden/semijoin-3-b4d4317dd3a10e18502f20f5c5250389 b/sql/hive/src/test/resources/golden/semijoin-3-b4d4317dd3a10e18502f20f5c5250389
new file mode 100644
index 0000000000000..bf51e8f5d9eb5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-3-b4d4317dd3a10e18502f20f5c5250389
@@ -0,0 +1,11 @@
+0	val_0
+0	val_0
+0	val_0
+4	val_2
+8	val_4
+10	val_5
+10	val_5
+10	val_5
+16	val_8
+18	val_9
+20	val_10
diff --git a/sql/hive/src/test/resources/golden/semijoin-30-dd901d00fce5898b03a57cbc3028a70a b/sql/hive/src/test/resources/golden/semijoin-30-dd901d00fce5898b03a57cbc3028a70a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-31-e5dc4d8185e63e984aa4e3a2e08bd67 b/sql/hive/src/test/resources/golden/semijoin-31-e5dc4d8185e63e984aa4e3a2e08bd67
new file mode 100644
index 0000000000000..d6283e34d8ffc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-31-e5dc4d8185e63e984aa4e3a2e08bd67
@@ -0,0 +1,14 @@
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+2	val_2
+4	val_4
+5	val_5
+5	val_5
+5	val_5
+8	val_8
+9	val_9
+10	val_10
diff --git a/sql/hive/src/test/resources/golden/semijoin-32-23017c7663f2710265a7e2a4a1606d39 b/sql/hive/src/test/resources/golden/semijoin-32-23017c7663f2710265a7e2a4a1606d39
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-33-ed499f94c6e6ac847ef5187b3b43bbc5 b/sql/hive/src/test/resources/golden/semijoin-33-ed499f94c6e6ac847ef5187b3b43bbc5
new file mode 100644
index 0000000000000..080180f9d0f0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-33-ed499f94c6e6ac847ef5187b3b43bbc5
@@ -0,0 +1,14 @@
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-34-5e1b832090ab73c141c1167d5b25a490 b/sql/hive/src/test/resources/golden/semijoin-34-5e1b832090ab73c141c1167d5b25a490
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-35-8d5731f26232f6e26dd8012461b08d99 b/sql/hive/src/test/resources/golden/semijoin-35-8d5731f26232f6e26dd8012461b08d99
new file mode 100644
index 0000000000000..4a64d5c625790
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-35-8d5731f26232f6e26dd8012461b08d99
@@ -0,0 +1,26 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-36-b1159823dca8025926407f8aa921238d b/sql/hive/src/test/resources/golden/semijoin-36-b1159823dca8025926407f8aa921238d
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-37-a15b9074f999ce836be5329591b968d0 b/sql/hive/src/test/resources/golden/semijoin-37-a15b9074f999ce836be5329591b968d0
new file mode 100644
index 0000000000000..1420c786fb228
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-37-a15b9074f999ce836be5329591b968d0
@@ -0,0 +1,29 @@
+NULL
+NULL
+NULL
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-38-f37547c73a48ce3ba089531b176e6ba b/sql/hive/src/test/resources/golden/semijoin-38-f37547c73a48ce3ba089531b176e6ba
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-39-c22a6f11368affcb80a9c80e653a47a8 b/sql/hive/src/test/resources/golden/semijoin-39-c22a6f11368affcb80a9c80e653a47a8
new file mode 100644
index 0000000000000..1420c786fb228
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-39-c22a6f11368affcb80a9c80e653a47a8
@@ -0,0 +1,29 @@
+NULL
+NULL
+NULL
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-4-dfdad5a2742f93e8ea888191460809c0 b/sql/hive/src/test/resources/golden/semijoin-4-dfdad5a2742f93e8ea888191460809c0
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-40-32071a51e2ba6e86b1c5e40de55aae63 b/sql/hive/src/test/resources/golden/semijoin-40-32071a51e2ba6e86b1c5e40de55aae63
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-41-cf74f73a33b1af8902b7970a9350b092 b/sql/hive/src/test/resources/golden/semijoin-41-cf74f73a33b1af8902b7970a9350b092
new file mode 100644
index 0000000000000..aef9483bb0bc9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-41-cf74f73a33b1af8902b7970a9350b092
@@ -0,0 +1,29 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
+16
+18
+20
diff --git a/sql/hive/src/test/resources/golden/semijoin-42-6b4257a74fca627785c967c99547f4c0 b/sql/hive/src/test/resources/golden/semijoin-42-6b4257a74fca627785c967c99547f4c0
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-43-e8a166ac2e94bf8d1da0fe91b0db2c81 b/sql/hive/src/test/resources/golden/semijoin-43-e8a166ac2e94bf8d1da0fe91b0db2c81
new file mode 100644
index 0000000000000..0bc413ef2e09e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-43-e8a166ac2e94bf8d1da0fe91b0db2c81
@@ -0,0 +1,31 @@
+NULL
+NULL
+NULL
+NULL
+NULL
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-44-945aaa3a24359ef73acab1e99500d5ea b/sql/hive/src/test/resources/golden/semijoin-44-945aaa3a24359ef73acab1e99500d5ea
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-45-3fd94ffd4f1eb6cf83dcc064599bf12b b/sql/hive/src/test/resources/golden/semijoin-45-3fd94ffd4f1eb6cf83dcc064599bf12b
new file mode 100644
index 0000000000000..3131e64446f66
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-45-3fd94ffd4f1eb6cf83dcc064599bf12b
@@ -0,0 +1,42 @@
+NULL
+NULL
+NULL
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+2
+4
+4
+5
+5
+5
+8
+8
+9
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
diff --git a/sql/hive/src/test/resources/golden/semijoin-46-620e01f81f6e5254b4bbe8fab4043ec0 b/sql/hive/src/test/resources/golden/semijoin-46-620e01f81f6e5254b4bbe8fab4043ec0
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-47-f0140e4ee92508ba241f91c157b7af9c b/sql/hive/src/test/resources/golden/semijoin-47-f0140e4ee92508ba241f91c157b7af9c
new file mode 100644
index 0000000000000..ff30bedb81861
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-47-f0140e4ee92508ba241f91c157b7af9c
@@ -0,0 +1,35 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+8
+8
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+16
+18
+20
diff --git a/sql/hive/src/test/resources/golden/semijoin-48-8a04442e84f99a584c2882d0af8c25d8 b/sql/hive/src/test/resources/golden/semijoin-48-8a04442e84f99a584c2882d0af8c25d8
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-49-df1d6705d3624be72036318a6b42f04c b/sql/hive/src/test/resources/golden/semijoin-49-df1d6705d3624be72036318a6b42f04c
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-5-d3c2f84a12374b307c58a69aba4ec70d b/sql/hive/src/test/resources/golden/semijoin-5-d3c2f84a12374b307c58a69aba4ec70d
new file mode 100644
index 0000000000000..60f6eacee9b14
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-5-d3c2f84a12374b307c58a69aba4ec70d
@@ -0,0 +1,22 @@
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+2	val_2
+4	val_2
+4	val_4
+5	val_5
+5	val_5
+5	val_5
+8	val_4
+8	val_8
+9	val_9
+10	val_10
+10	val_5
+10	val_5
+10	val_5
+16	val_8
+18	val_9
+20	val_10
diff --git a/sql/hive/src/test/resources/golden/semijoin-6-90bb51b1330230d10a14fb7517457aa0 b/sql/hive/src/test/resources/golden/semijoin-6-90bb51b1330230d10a14fb7517457aa0
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-7-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/semijoin-7-333d72e8bce6d11a35fc7a30418f225b
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-8-d46607be851a6f4e27e98cbbefdee994 b/sql/hive/src/test/resources/golden/semijoin-8-d46607be851a6f4e27e98cbbefdee994
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semijoin-9-f7adaf0f77ce6ff8c3a4807f428d8de2 b/sql/hive/src/test/resources/golden/semijoin-9-f7adaf0f77ce6ff8c3a4807f428d8de2
new file mode 100644
index 0000000000000..5baaac9bebf6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semijoin-9-f7adaf0f77ce6ff8c3a4807f428d8de2
@@ -0,0 +1,6 @@
+0	val_0
+0	val_0
+0	val_0
+4	val_4
+8	val_8
+10	val_10
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index f9a162ef4e3c0..91ac03ca30cd7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -56,4 +56,20 @@ class CachedTableSuite extends HiveComparisonTest {
       TestHive.uncacheTable("src")
     }
   }
+
+  test("'CACHE TABLE' and 'UNCACHE TABLE' HiveQL statement") {
+    TestHive.hql("CACHE TABLE src")
+    TestHive.table("src").queryExecution.executedPlan match {
+      case _: InMemoryColumnarTableScan => // Found evidence of caching
+      case _ => fail(s"Table 'src' should be cached")
+    }
+    assert(TestHive.isCached("src"), "Table 'src' should be cached")
+
+    TestHive.hql("UNCACHE TABLE src")
+    TestHive.table("src").queryExecution.executedPlan match {
+      case _: InMemoryColumnarTableScan => fail(s"Table 'src' should not be cached")
+      case _ => // Found evidence of uncaching
+    }
+    assert(!TestHive.isCached("src"), "Table 'src' should not be cached")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index fb8f272d5abfe..3581617c269a6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -597,6 +597,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "select_unquote_and",
     "select_unquote_not",
     "select_unquote_or",
+    "semijoin",
     "serde_regex",
     "serde_reported_schema",
     "set_variable_sub",
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
index 86753360a07e4..a0aeacbc733bd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
@@ -27,6 +27,7 @@ private[streaming] class ContextWaiter {
   }
 
   def notifyStop() = synchronized {
+    stopped = true
     notifyAll()
   }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
index 303d149d285e1..d9ac3c91f6e36 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
@@ -29,7 +29,6 @@ import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
-import scala.language.postfixOps
 
 /** Testsuite for testing the network receiver behavior */
 class NetworkReceiverSuite extends FunSuite with Timeouts {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index cd86019f63e7e..7b33d3b235466 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -223,6 +223,18 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("awaitTermination after stop") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    val inputStream = addInputStream(ssc)
+    inputStream.map(x => x).register()
+
+    failAfter(10000 millis) {
+      ssc.start()
+      ssc.stop()
+      ssc.awaitTermination()
+    }
+  }
+
   test("awaitTermination with error in task") {
     ssc = new StreamingContext(master, appName, batchDuration)
     val inputStream = addInputStream(ssc)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index ef0efa552ceaf..2861f5335ae36 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -27,12 +27,12 @@ import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.streaming.scheduler._
 
-import org.scalatest.matchers.ShouldMatchers
+import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 import org.apache.spark.Logging
 
-class StreamingListenerSuite extends TestSuiteBase with ShouldMatchers {
+class StreamingListenerSuite extends TestSuiteBase with Matchers {
 
   val input = (1 to 4).map(Seq(_)).toSeq
   val operation = (d: DStream[Int]) => d.map(x => x)
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 6a261e19a35cd..03a73f92b275e 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -40,74 +40,78 @@ object GenerateMIMAIgnore {
   private val classLoader = Thread.currentThread().getContextClassLoader
   private val mirror = runtimeMirror(classLoader)
 
-  private def classesPrivateWithin(packageName: String): Set[String] = {
+
+  private def isDeveloperApi(sym: unv.Symbol) =
+    sym.annotations.exists(_.tpe =:= unv.typeOf[org.apache.spark.annotation.DeveloperApi])
+
+  private def isExperimental(sym: unv.Symbol) =
+    sym.annotations.exists(_.tpe =:= unv.typeOf[org.apache.spark.annotation.Experimental])
+
+
+  private def isPackagePrivate(sym: unv.Symbol) =
+    !sym.privateWithin.fullName.startsWith("<none>")
+
+  private def isPackagePrivateModule(moduleSymbol: unv.ModuleSymbol) =
+    !moduleSymbol.privateWithin.fullName.startsWith("<none>")
+
+  /**
+   * For every class checks via scala reflection if the class itself or contained members
+   * have DeveloperApi or Experimental annotations or they are package private.
+   * Returns the tuple of such classes and members.
+   */
+  private def privateWithin(packageName: String): (Set[String], Set[String]) = {
 
     val classes = getClasses(packageName)
     val ignoredClasses = mutable.HashSet[String]()
+    val ignoredMembers = mutable.HashSet[String]()
 
-    def isPackagePrivate(className: String) = {
+    for (className <- classes) {
       try {
-        /* Couldn't figure out if it's possible to determine a-priori whether a given symbol
-           is a module or class. */
-
-        val privateAsClass = mirror
-          .classSymbol(Class.forName(className, false, classLoader))
-          .privateWithin
-          .fullName
-          .startsWith(packageName)
-
-        val privateAsModule = mirror
-          .staticModule(className)
-          .privateWithin
-          .fullName
-          .startsWith(packageName)
-
-        privateAsClass || privateAsModule
-      } catch {
-        case _: Throwable => {
-          println("Error determining visibility: " + className)
-          false
+        val classSymbol = mirror.classSymbol(Class.forName(className, false, classLoader))
+        val moduleSymbol = mirror.staticModule(className) // TODO: see if it is necessary.
+        val directlyPrivateSpark =
+          isPackagePrivate(classSymbol) || isPackagePrivateModule(moduleSymbol)
+        val developerApi = isDeveloperApi(classSymbol)
+        val experimental = isExperimental(classSymbol)
+
+        /* Inner classes defined within a private[spark] class or object are effectively
+         invisible, so we account for them as package private. */
+        lazy val indirectlyPrivateSpark = {
+          val maybeOuter = className.toString.takeWhile(_ != '$')
+          if (maybeOuter != className) {
+            isPackagePrivate(mirror.classSymbol(Class.forName(maybeOuter, false, classLoader))) ||
+              isPackagePrivateModule(mirror.staticModule(maybeOuter))
+          } else {
+            false
+          }
+        }
+        if (directlyPrivateSpark || indirectlyPrivateSpark || developerApi || experimental) {
+          ignoredClasses += className
+        } else {
+          // check if this class has package-private/annotated members.
+          ignoredMembers ++= getAnnotatedOrPackagePrivateMembers(classSymbol)
         }
-      }
-    }
 
-    def isDeveloperApi(className: String) = {
-      try {
-        val clazz = mirror.classSymbol(Class.forName(className, false, classLoader))
-        clazz.annotations.exists(_.tpe =:= unv.typeOf[org.apache.spark.annotation.DeveloperApi])
       } catch {
-        case _: Throwable => {
-          println("Error determining Annotations: " + className)
-          false
-        }
+        case _: Throwable => println("Error instrumenting class:" + className)
       }
     }
+    (ignoredClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet, ignoredMembers.toSet)
+  }
 
-    for (className <- classes) {
-      val directlyPrivateSpark = isPackagePrivate(className)
-      val developerApi = isDeveloperApi(className)
-
-      /* Inner classes defined within a private[spark] class or object are effectively
-         invisible, so we account for them as package private. */
-      val indirectlyPrivateSpark = {
-        val maybeOuter = className.toString.takeWhile(_ != '$')
-        if (maybeOuter != className) {
-          isPackagePrivate(maybeOuter)
-        } else {
-          false
-        }
-      }
-      if (directlyPrivateSpark || indirectlyPrivateSpark || developerApi) {
-        ignoredClasses += className
-      }
-    }
-    ignoredClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
+  private def getAnnotatedOrPackagePrivateMembers(classSymbol: unv.ClassSymbol) = {
+    classSymbol.typeSignature.members
+      .filter(x => isPackagePrivate(x) || isDeveloperApi(x) || isExperimental(x)).map(_.fullName)
   }
 
   def main(args: Array[String]) {
-    scala.tools.nsc.io.File(".generated-mima-excludes").
-      writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
-    println("Created : .generated-mima-excludes in current directory.")
+    val (privateClasses, privateMembers) = privateWithin("org.apache.spark")
+    scala.tools.nsc.io.File(".generated-mima-class-excludes").
+      writeAll(privateClasses.mkString("\n"))
+    println("Created : .generated-mima-class-excludes in current directory.")
+    scala.tools.nsc.io.File(".generated-mima-member-excludes").
+      writeAll(privateMembers.mkString("\n"))
+    println("Created : .generated-mima-member-excludes in current directory.")
   }
 
 
@@ -140,10 +144,17 @@ object GenerateMIMAIgnore {
    * Get all classes in a package from a jar file.
    */
   private def getClassesFromJar(jarPath: String, packageName: String) = {
+    import scala.collection.mutable
     val jar = new JarFile(new File(jarPath))
     val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
-    val classes = for (entry <- enums if entry.endsWith(".class"))
-      yield Class.forName(entry.replace('/', '.').stripSuffix(".class"), false, classLoader)
+    val classes = mutable.HashSet[Class[_]]()
+    for (entry <- enums if entry.endsWith(".class")) {
+      try {
+        classes += Class.forName(entry.replace('/', '.').stripSuffix(".class"), false, classLoader)
+      } catch {
+        case _: Throwable => println("Unable to load:" + entry)
+      }
+    }
     classes
   }
 }
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 8f0ecb855718e..1cc9c33cd2d02 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -277,7 +277,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
         yarnAllocator.allocateContainers(
           math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0))
         ApplicationMaster.incrementAllocatorLoop(1)
-        Thread.sleep(100)
+        Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
       }
     } finally {
       // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT,
@@ -416,6 +416,7 @@ object ApplicationMaster {
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
   private val ALLOCATOR_LOOP_WAIT_COUNT = 30
+  private val ALLOCATE_HEARTBEAT_INTERVAL = 100
 
   def incrementAllocatorLoop(by: Int) {
     val count = yarnAllocatorLoop.getAndAdd(by)
@@ -467,13 +468,22 @@ object ApplicationMaster {
       })
     }
 
-    // Wait for initialization to complete and atleast 'some' nodes can get allocated.
+    modified
+  }
+
+
+  /**
+   * Returns when we've either
+   *  1) received all the requested executors,
+   *  2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms,
+   *  3) hit an error that causes us to terminate trying to get containers.
+   */
+  def waitForInitialAllocations() {
     yarnAllocatorLoop.synchronized {
       while (yarnAllocatorLoop.get() <= ALLOCATOR_LOOP_WAIT_COUNT) {
         yarnAllocatorLoop.wait(1000L)
       }
     }
-    modified
   }
 
   def main(argStrings: Array[String]) {
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index a3bd91590fc25..b6ecae1e652fe 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -271,6 +271,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
       .asInstanceOf[FinishApplicationMasterRequest]
     finishReq.setAppAttemptId(appAttemptId)
     finishReq.setFinishApplicationStatus(status)
+    finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", ""))
     resourceManager.finishApplicationMaster(finishReq)
   }
 
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 801e8b381588f..29a35680c0e72 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.yarn
 
 import java.io.File
 import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
-import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
@@ -37,7 +36,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.{Apps, Records}
+import org.apache.hadoop.yarn.util.Records
 import org.apache.spark.{Logging, SparkConf, SparkContext}
 
 /**
@@ -169,14 +168,13 @@ trait ClientBase extends Logging {
     destPath
   }
 
-  def qualifyForLocal(localURI: URI): Path = {
+  private def qualifyForLocal(localURI: URI): Path = {
     var qualifiedURI = localURI
-    // If not specified assume these are in the local filesystem to keep behavior like Hadoop
+    // If not specified, assume these are in the local filesystem to keep behavior like Hadoop
     if (qualifiedURI.getScheme() == null) {
       qualifiedURI = new URI(FileSystem.getLocal(conf).makeQualified(new Path(qualifiedURI)).toString)
     }
-    val qualPath = new Path(qualifiedURI)
-    qualPath
+    new Path(qualifiedURI)
   }
 
   def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = {
@@ -305,13 +303,13 @@ trait ClientBase extends Logging {
 
     val amMemory = calculateAMMemory(newApp)
 
-    val JAVA_OPTS = ListBuffer[String]()
+    val javaOpts = ListBuffer[String]()
 
     // Add Xmx for AM memory
-    JAVA_OPTS += "-Xmx" + amMemory + "m"
+    javaOpts += "-Xmx" + amMemory + "m"
 
     val tmpDir = new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
-    JAVA_OPTS += "-Djava.io.tmpdir=" + tmpDir
+    javaOpts += "-Djava.io.tmpdir=" + tmpDir
 
     // TODO: Remove once cpuset version is pushed out.
     // The context is, default gc for server class machines ends up using all cores to do gc -
@@ -325,11 +323,11 @@ trait ClientBase extends Logging {
     if (useConcurrentAndIncrementalGC) {
       // In our expts, using (default) throughput collector has severe perf ramifications in
       // multi-tenant machines
-      JAVA_OPTS += "-XX:+UseConcMarkSweepGC"
-      JAVA_OPTS += "-XX:+CMSIncrementalMode"
-      JAVA_OPTS += "-XX:+CMSIncrementalPacing"
-      JAVA_OPTS += "-XX:CMSIncrementalDutyCycleMin=0"
-      JAVA_OPTS += "-XX:CMSIncrementalDutyCycle=10"
+      javaOpts += "-XX:+UseConcMarkSweepGC"
+      javaOpts += "-XX:+CMSIncrementalMode"
+      javaOpts += "-XX:+CMSIncrementalPacing"
+      javaOpts += "-XX:CMSIncrementalDutyCycleMin=0"
+      javaOpts += "-XX:CMSIncrementalDutyCycle=10"
     }
 
     // SPARK_JAVA_OPTS is deprecated, but for backwards compatibility:
@@ -344,22 +342,22 @@ trait ClientBase extends Logging {
       // If we are being launched in client mode, forward the spark-conf options
       // onto the executor launcher
       for ((k, v) <- sparkConf.getAll) {
-        JAVA_OPTS += "-D" + k + "=" + "\\\"" + v + "\\\""
+        javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\""
       }
     } else {
       // If we are being launched in standalone mode, capture and forward any spark
       // system properties (e.g. set by spark-class).
       for ((k, v) <- sys.props.filterKeys(_.startsWith("spark"))) {
-        JAVA_OPTS += "-D" + k + "=" + "\\\"" + v + "\\\""
+        javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\""
       }
-      sys.props.get("spark.driver.extraJavaOptions").foreach(opts => JAVA_OPTS += opts)
-      sys.props.get("spark.driver.libraryPath").foreach(p => JAVA_OPTS += s"-Djava.library.path=$p")
+      sys.props.get("spark.driver.extraJavaOptions").foreach(opts => javaOpts += opts)
+      sys.props.get("spark.driver.libraryPath").foreach(p => javaOpts += s"-Djava.library.path=$p")
     }
-    JAVA_OPTS += ClientBase.getLog4jConfiguration(localResources)
+    javaOpts += ClientBase.getLog4jConfiguration(localResources)
 
     // Command for the ApplicationMaster
     val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server") ++
-      JAVA_OPTS ++
+      javaOpts ++
       Seq(args.amClass, "--class", args.userClass, "--jar ", args.userJar,
         userArgsToString(args),
         "--executor-memory", args.executorMemory.toString,
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index 32f8861dc9503..43dbb2464f929 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records}
+import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark.{Logging, SparkConf}
 
@@ -46,19 +46,19 @@ trait ExecutorRunnableUtil extends Logging {
       executorCores: Int,
       localResources: HashMap[String, LocalResource]): List[String] = {
     // Extra options for the JVM
-    val JAVA_OPTS = ListBuffer[String]()
+    val javaOpts = ListBuffer[String]()
     // Set the JVM memory
     val executorMemoryString = executorMemory + "m"
-    JAVA_OPTS += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
+    javaOpts += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
 
     // Set extra Java options for the executor, if defined
     sys.props.get("spark.executor.extraJavaOptions").foreach { opts =>
-      JAVA_OPTS += opts
+      javaOpts += opts
     }
 
-    JAVA_OPTS += "-Djava.io.tmpdir=" +
+    javaOpts += "-Djava.io.tmpdir=" +
       new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
-    JAVA_OPTS += ClientBase.getLog4jConfiguration(localResources)
+    javaOpts += ClientBase.getLog4jConfiguration(localResources)
 
     // Certain configs need to be passed here because they are needed before the Executor
     // registers with the Scheduler and transfers the spark configs. Since the Executor backend
@@ -66,10 +66,10 @@ trait ExecutorRunnableUtil extends Logging {
     // authentication settings.
     sparkConf.getAll.
       filter { case (k, v) => k.startsWith("spark.auth") || k.startsWith("spark.akka") }.
-      foreach { case (k, v) => JAVA_OPTS += "-D" + k + "=" + "\\\"" + v + "\\\"" }
+      foreach { case (k, v) => javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\"" }
 
     sparkConf.getAkkaConf.
-      foreach { case (k, v) => JAVA_OPTS += "-D" + k + "=" + "\\\"" + v + "\\\"" }
+      foreach { case (k, v) => javaOpts += "-D" + k + "=" + "\\\"" + v + "\\\"" }
 
     // Commenting it out for now - so that people can refer to the properties if required. Remove
     // it once cpuset version is pushed out.
@@ -88,11 +88,11 @@ trait ExecutorRunnableUtil extends Logging {
           // multi-tennent machines
           // The options are based on
           // http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use%20the%20Concurrent%20Low%20Pause%20Collector|outline
-          JAVA_OPTS += " -XX:+UseConcMarkSweepGC "
-          JAVA_OPTS += " -XX:+CMSIncrementalMode "
-          JAVA_OPTS += " -XX:+CMSIncrementalPacing "
-          JAVA_OPTS += " -XX:CMSIncrementalDutyCycleMin=0 "
-          JAVA_OPTS += " -XX:CMSIncrementalDutyCycle=10 "
+          javaOpts += " -XX:+UseConcMarkSweepGC "
+          javaOpts += " -XX:+CMSIncrementalMode "
+          javaOpts += " -XX:+CMSIncrementalPacing "
+          javaOpts += " -XX:CMSIncrementalDutyCycleMin=0 "
+          javaOpts += " -XX:CMSIncrementalDutyCycle=10 "
         }
     */
 
@@ -104,7 +104,7 @@ trait ExecutorRunnableUtil extends Logging {
       // TODO: If the OOM is not recoverable by rescheduling it on different node, then do
       // 'something' to fail job ... akin to blacklisting trackers in mapred ?
       "-XX:OnOutOfMemoryError='kill %p'") ++
-      JAVA_OPTS ++
+      javaOpts ++
       Seq("org.apache.spark.executor.CoarseGrainedExecutorBackend",
       masterAddress.toString,
       slaveId.toString,
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index a4638cc863611..39cdd2e8a522b 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -33,10 +33,11 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration)
 
   def this(sc: SparkContext) = this(sc, new Configuration())
 
-  // Nothing else for now ... initialize application master : which needs sparkContext to determine how to allocate
-  // Note that only the first creation of SparkContext influences (and ideally, there must be only one SparkContext, right ?)
-  // Subsequent creations are ignored - since nodes are already allocated by then.
-
+  // Nothing else for now ... initialize application master : which needs a SparkContext to
+  // determine how to allocate.
+  // Note that only the first creation of a SparkContext influences (and ideally, there must be
+  // only one SparkContext, right ?). Subsequent creations are ignored since executors are already
+  // allocated by then.
 
   // By default, rack is unknown
   override def getRackForHost(hostPort: String): Option[String] = {
@@ -48,6 +49,7 @@ private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration)
   override def postStartHook() {
     val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc)
     if (sparkContextInitialized){
+      ApplicationMaster.waitForInitialAllocations()
       // Wait for a few seconds for the slaves to bootstrap and register with master - best case attempt
       Thread.sleep(3000L)
     }
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 33a60d978c586..6244332f23737 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -19,13 +19,12 @@ package org.apache.spark.deploy.yarn
 
 import java.io.IOException
 import java.util.concurrent.CopyOnWriteArrayList
-import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
+import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConversions._
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.net.NetUtils
 import org.apache.hadoop.util.ShutdownHookManager
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.protocolrecords._
@@ -33,8 +32,7 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
+import org.apache.hadoop.yarn.util.ConverterUtils
 import org.apache.hadoop.yarn.webapp.util.WebAppUtils
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
@@ -77,17 +75,18 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     // than user specified and /tmp.
     System.setProperty("spark.local.dir", getLocalDirs())
 
-    // set the web ui port to be ephemeral for yarn so we don't conflict with
+    // Set the web ui port to be ephemeral for yarn so we don't conflict with
     // other spark processes running on the same box
     System.setProperty("spark.ui.port", "0")
 
-    // when running the AM, the Spark master is always "yarn-cluster"
+    // When running the AM, the Spark master is always "yarn-cluster"
     System.setProperty("spark.master", "yarn-cluster")
 
-    // Use priority 30 as it's higher then HDFS. It's same priority as MapReduce is using.
+    // Use priority 30 as it's higher than HDFS. It's the same priority MapReduce is using.
     ShutdownHookManager.get().addShutdownHook(new AppMasterShutdownHook(this), 30)
 
-    appAttemptId = getApplicationAttemptId()
+    appAttemptId = ApplicationMaster.getApplicationAttemptId()
+    logInfo("ApplicationAttemptId: " + appAttemptId)
     isLastAMRetry = appAttemptId.getAttemptId() >= maxAppAttempts
     amClient = AMRMClient.createAMRMClient()
     amClient.init(yarnConf)
@@ -99,7 +98,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     ApplicationMaster.register(this)
 
     // Call this to force generation of secret so it gets populated into the
-    // hadoop UGI. This has to happen before the startUserClass which does a
+    // Hadoop UGI. This has to happen before the startUserClass which does a
     // doAs in order for the credentials to be passed on to the executor containers.
     val securityMgr = new SecurityManager(sparkConf)
 
@@ -121,7 +120,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     // Allocate all containers
     allocateExecutors()
 
-    // Wait for the user class to Finish
+    // Launch thread that will heartbeat to the RM so it won't think the app has died.
+    launchReporterThread()
+
+    // Wait for the user class to finish
     userThread.join()
 
     System.exit(0)
@@ -141,7 +143,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.params", params)
   }
 
-  /** Get the Yarn approved local directories. */
+  // Get the Yarn approved local directories.
   private def getLocalDirs(): String = {
     // Hadoop 0.23 and 2.x have different Environment variable names for the
     // local dirs, so lets check both. We assume one of the 2 is set.
@@ -150,18 +152,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       .orElse(Option(System.getenv("LOCAL_DIRS")))
  
     localDirs match {
-      case None => throw new Exception("Yarn Local dirs can't be empty")
+      case None => throw new Exception("Yarn local dirs can't be empty")
       case Some(l) => l
     }
-  } 
-
-  private def getApplicationAttemptId(): ApplicationAttemptId = {
-    val envs = System.getenv()
-    val containerIdString = envs.get(ApplicationConstants.Environment.CONTAINER_ID.name())
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    logInfo("ApplicationAttemptId: " + appAttemptId)
-    appAttemptId
   }
 
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
@@ -173,25 +166,23 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     logInfo("Starting the user JAR in a separate Thread")
     val mainMethod = Class.forName(
       args.userClass,
-      false /* initialize */ ,
+      false,
       Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
     val t = new Thread {
       override def run() {
-
-      var successed = false
+        var succeeded = false
         try {
           // Copy
-          var mainArgs: Array[String] = new Array[String](args.userArgs.size)
+          val mainArgs = new Array[String](args.userArgs.size)
           args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
           mainMethod.invoke(null, mainArgs)
-          // some job script has "System.exit(0)" at the end, for example SparkPi, SparkLR
-          // userThread will stop here unless it has uncaught exception thrown out
-          // It need shutdown hook to set SUCCEEDED
-          successed = true
+          // Some apps have "System.exit(0)" at the end.  The user thread will stop here unless
+          // it has an uncaught exception thrown out.  It needs a shutdown hook to set SUCCEEDED.
+          succeeded = true
         } finally {
-          logDebug("finishing main")
+          logDebug("Finishing main")
           isLastAMRetry = true
-          if (successed) {
+          if (succeeded) {
             ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
           } else {
             ApplicationMaster.this.finishApplicationMaster(FinalApplicationStatus.FAILED)
@@ -199,11 +190,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
         }
       }
     }
+    t.setName("Driver")
     t.start()
     t
   }
 
-  // This need to happen before allocateExecutors()
+  // This needs to happen before allocateExecutors()
   private def waitForSparkContextInitialized() {
     logInfo("Waiting for Spark context initialization")
     try {
@@ -231,7 +223,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
             sparkContext.preferredNodeLocationData,
             sparkContext.getConf)
         } else {
-          logWarning("Unable to retrieve SparkContext inspite of waiting for %d, maxNumTries = %d".
+          logWarning("Unable to retrieve SparkContext in spite of waiting for %d, maxNumTries = %d".
             format(numTries * waitTime, maxNumTries))
           this.yarnAllocator = YarnAllocationHandler.newAllocator(
             yarnConf,
@@ -242,48 +234,37 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
         }
       }
     } finally {
-      // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT :
-      // so that the loop (in ApplicationMaster.sparkContextInitialized) breaks.
-      ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
+      // In case of exceptions, etc - ensure that the loop in
+      // ApplicationMaster#sparkContextInitialized() breaks.
+      ApplicationMaster.doneWithInitialAllocations()
     }
   }
 
   private def allocateExecutors() {
     try {
-      logInfo("Allocating " + args.numExecutors + " executors.")
-      // Wait until all containers have finished
+      logInfo("Requesting" + args.numExecutors + " executors.")
+      // Wait until all containers have launched
       yarnAllocator.addResourceRequests(args.numExecutors)
       yarnAllocator.allocateResources()
       // Exits the loop if the user thread exits.
+
+      var iters = 0
       while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
         checkNumExecutorsFailed()
         allocateMissingExecutor()
         yarnAllocator.allocateResources()
-        ApplicationMaster.incrementAllocatorLoop(1)
-        Thread.sleep(100)
+        if (iters == ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT) {
+          ApplicationMaster.doneWithInitialAllocations()
+        }
+        Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
+        iters += 1
       }
     } finally {
-      // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT,
-      // so that the loop in ApplicationMaster#sparkContextInitialized() breaks.
-      ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
+      // In case of exceptions, etc - ensure that the loop in
+      // ApplicationMaster#sparkContextInitialized() breaks.
+      ApplicationMaster.doneWithInitialAllocations()
     }
     logInfo("All executors have launched.")
-
-    // Launch a progress reporter thread, else the app will get killed after expiration
-    // (def: 10mins) timeout.
-    if (userThread.isAlive) {
-      // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
-      val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-
-      // we want to be reasonably responsive without causing too many requests to RM.
-      val schedulerInterval =
-        sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
-
-      // must be <= timeoutInterval / 2.
-      val interval = math.min(timeoutInterval / 2, schedulerInterval)
-
-      launchReporterThread(interval)
-    }
   }
 
   private def allocateMissingExecutor() {
@@ -303,47 +284,35 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
     }
   }
 
-  private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
+  private def launchReporterThread(): Thread = {
+    // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
+    val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
+
+    // we want to be reasonably responsive without causing too many requests to RM.
+    val schedulerInterval =
+      sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
+
+    // must be <= timeoutInterval / 2.
+    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
 
     val t = new Thread {
       override def run() {
         while (userThread.isAlive) {
           checkNumExecutorsFailed()
           allocateMissingExecutor()
-          sendProgress()
-          Thread.sleep(sleepTime)
+          logDebug("Sending progress")
+          yarnAllocator.allocateResources()
+          Thread.sleep(interval)
         }
       }
     }
     // Setting to daemon status, though this is usually not a good idea.
     t.setDaemon(true)
     t.start()
-    logInfo("Started progress reporter thread - sleep time : " + sleepTime)
+    logInfo("Started progress reporter thread - heartbeat interval : " + interval)
     t
   }
 
-  private def sendProgress() {
-    logDebug("Sending progress")
-    // Simulated with an allocate request with no nodes requested.
-    yarnAllocator.allocateResources()
-  }
-
-  /*
-  def printContainers(containers: List[Container]) = {
-    for (container <- containers) {
-      logInfo("Launching shell command on a new container."
-        + ", containerId=" + container.getId()
-        + ", containerNode=" + container.getNodeId().getHost()
-        + ":" + container.getNodeId().getPort()
-        + ", containerNodeURI=" + container.getNodeHttpAddress()
-        + ", containerState" + container.getState()
-        + ", containerResourceMemory"
-        + container.getResource().getMemory())
-    }
-  }
-  */
-
   def finishApplicationMaster(status: FinalApplicationStatus, diagnostics: String = "") {
     synchronized {
       if (isFinished) {
@@ -351,7 +320,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
       }
       isFinished = true
 
-      logInfo("finishApplicationMaster with " + status)
+      logInfo("Unregistering ApplicationMaster with " + status)
       if (registered) {
         val trackingUrl = sparkConf.get("spark.yarn.historyServer.address", "")
         amClient.unregisterApplicationMaster(status, diagnostics, trackingUrl)
@@ -386,7 +355,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
 
     def run() {
       logInfo("AppMaster received a signal.")
-      // we need to clean up staging dir before HDFS is shut down
+      // We need to clean up staging dir before HDFS is shut down
       // make sure we don't delete it until this is the last AM
       if (appMaster.isLastAMRetry) appMaster.cleanupStagingDir()
     }
@@ -401,21 +370,24 @@ object ApplicationMaster {
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
   private val ALLOCATOR_LOOP_WAIT_COUNT = 30
+  private val ALLOCATE_HEARTBEAT_INTERVAL = 100
 
   private val applicationMasters = new CopyOnWriteArrayList[ApplicationMaster]()
 
   val sparkContextRef: AtomicReference[SparkContext] =
-    new AtomicReference[SparkContext](null /* initialValue */)
+    new AtomicReference[SparkContext](null)
 
-  val yarnAllocatorLoop: AtomicInteger = new AtomicInteger(0)
+  // Variable used to notify the YarnClusterScheduler that it should stop waiting
+  // for the initial set of executors to be started and get on with its business.
+  val doneWithInitialAllocationsMonitor = new Object()
 
-  def incrementAllocatorLoop(by: Int) {
-    val count = yarnAllocatorLoop.getAndAdd(by)
-    if (count >= ALLOCATOR_LOOP_WAIT_COUNT) {
-      yarnAllocatorLoop.synchronized {
-        // to wake threads off wait ...
-        yarnAllocatorLoop.notifyAll()
-      }
+  @volatile var isDoneWithInitialAllocations = false
+
+  def doneWithInitialAllocations() {
+    isDoneWithInitialAllocations = true
+    doneWithInitialAllocationsMonitor.synchronized {
+      // to wake threads off wait ...
+      doneWithInitialAllocationsMonitor.notifyAll()
     }
   }
 
@@ -423,7 +395,10 @@ object ApplicationMaster {
     applicationMasters.add(master)
   }
 
-  // TODO(harvey): See whether this should be discarded - it isn't used anywhere atm...
+  /**
+   * Called from YarnClusterScheduler to notify the AM code that a SparkContext has been
+   * initialized in the user code.
+   */
   def sparkContextInitialized(sc: SparkContext): Boolean = {
     var modified = false
     sparkContextRef.synchronized {
@@ -431,7 +406,7 @@ object ApplicationMaster {
       sparkContextRef.notifyAll()
     }
 
-    // Add a shutdown hook - as a best case effort in case users do not call sc.stop or do
+    // Add a shutdown hook - as a best effort in case users do not call sc.stop or do
     // System.exit.
     // Should not really have to do this, but it helps YARN to evict resources earlier.
     // Not to mention, prevent the Client from declaring failure even though we exited properly.
@@ -454,13 +429,29 @@ object ApplicationMaster {
       })
     }
 
-    // Wait for initialization to complete and atleast 'some' nodes can get allocated.
-    yarnAllocatorLoop.synchronized {
-      while (yarnAllocatorLoop.get() <= ALLOCATOR_LOOP_WAIT_COUNT) {
-        yarnAllocatorLoop.wait(1000L)
+    // Wait for initialization to complete and at least 'some' nodes to get allocated.
+    modified
+  }
+
+  /**
+   * Returns when we've either
+   *  1) received all the requested executors,
+   *  2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms,
+   *  3) hit an error that causes us to terminate trying to get containers.
+   */
+  def waitForInitialAllocations() {
+    doneWithInitialAllocationsMonitor.synchronized {
+      while (!isDoneWithInitialAllocations) {
+        doneWithInitialAllocationsMonitor.wait(1000L)
       }
     }
-    modified
+  }
+
+  def getApplicationAttemptId(): ApplicationAttemptId = {
+    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+    val containerId = ConverterUtils.toContainerId(containerIdString)
+    val appAttemptId = containerId.getApplicationAttemptId()
+    appAttemptId
   }
 
   def main(argStrings: Array[String]) {
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 393edd1f2d670..24027618c1f35 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,14 +21,12 @@ import java.nio.ByteBuffer
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.YarnClient
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{Apps, Records}
+import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{Logging, SparkConf}
 
@@ -102,7 +100,7 @@ class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: Spa
 
   def logClusterResourceDetails() {
     val clusterMetrics: YarnClusterMetrics = yarnClient.getYarnClusterMetrics
-    logInfo("Got Cluster metric info from ApplicationsManager (ASM), number of NodeManagers: " +
+    logInfo("Got Cluster metric info from ResourceManager, number of NodeManagers: " +
       clusterMetrics.getNumNodeManagers)
 
     val queueInfo: QueueInfo = yarnClient.getQueueInfo(args.amQueue)
@@ -133,7 +131,7 @@ class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: Spa
 
   def submitApp(appContext: ApplicationSubmissionContext) = {
     // Submit the application to the applications manager.
-    logInfo("Submitting application to ASM")
+    logInfo("Submitting application to ResourceManager")
     yarnClient.submitApplication(appContext)
   }
 
@@ -149,7 +147,7 @@ class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: Spa
       Thread.sleep(interval)
       val report = yarnClient.getApplicationReport(appId)
 
-      logInfo("Application report from ASM: \n" +
+      logInfo("Application report from ResourceManager: \n" +
         "\t application identifier: " + appId.toString() + "\n" +
         "\t appId: " + appId.getId() + "\n" +
         "\t clientToAMToken: " + report.getClientToAMToken() + "\n" +
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index d93e5bb0225d5..f71ad036ce0f2 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -72,8 +72,8 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     override def preStart() {
       logInfo("Listen to driver: " + driverUrl)
       driver = context.actorSelection(driverUrl)
-      // Send a hello message thus the connection is actually established,
-      // thus we can monitor Lifecycle Events.
+      // Send a hello message to establish the connection, after which
+      // we can monitor Lifecycle Events.
       driver ! "Hello"
       context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     }
@@ -95,7 +95,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     amClient.init(yarnConf)
     amClient.start()
 
-    appAttemptId = getApplicationAttemptId()
+    appAttemptId = ApplicationMaster.getApplicationAttemptId()
     registerApplicationMaster()
 
     waitForSparkMaster()
@@ -115,7 +115,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     val interval = math.min(timeoutInterval / 2, schedulerInterval)
 
     reporterThread = launchReporterThread(interval)
-    
+
 
     // Wait for the reporter thread to Finish.
     reporterThread.join()
@@ -134,25 +134,16 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
     val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
       .orElse(Option(System.getenv("LOCAL_DIRS")))
- 
+
     localDirs match {
       case None => throw new Exception("Yarn Local dirs can't be empty")
       case Some(l) => l
     }
-  } 
-
-  private def getApplicationAttemptId(): ApplicationAttemptId = {
-    val envs = System.getenv()
-    val containerIdString = envs.get(ApplicationConstants.Environment.CONTAINER_ID.name())
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    logInfo("ApplicationAttemptId: " + appAttemptId)
-    appAttemptId
   }
 
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
     logInfo("Registering the ApplicationMaster")
-    // TODO:(Raymond) Find out Spark UI address and fill in here?
+    // TODO: Find out client's Spark UI address and fill in here?
     amClient.registerApplicationMaster(Utils.localHostName(), 0, "")
   }
 
@@ -185,8 +176,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
 
 
   private def allocateExecutors() {
-
-    // Fixme: should get preferredNodeLocationData from SparkContext, just fake a empty one for now.
+    // TODO: should get preferredNodeLocationData from SparkContext, just fake a empty one for now.
     val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
       scala.collection.immutable.Map()
 
@@ -198,8 +188,8 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
       preferredNodeLocationData,
       sparkConf)
 
-    logInfo("Allocating " + args.numExecutors + " executors.")
-    // Wait until all containers have finished
+    logInfo("Requesting " + args.numExecutors + " executors.")
+    // Wait until all containers have launched
     yarnAllocator.addResourceRequests(args.numExecutors)
     yarnAllocator.allocateResources()
     while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
@@ -221,7 +211,6 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     }
   }
 
-  // TODO: We might want to extend this to allocate more containers in case they die !
   private def launchReporterThread(_sleepTime: Long): Thread = {
     val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
 
@@ -229,7 +218,8 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
       override def run() {
         while (!driverClosed) {
           allocateMissingExecutor()
-          sendProgress()
+          logDebug("Sending progress")
+          yarnAllocator.allocateResources()
           Thread.sleep(sleepTime)
         }
       }
@@ -241,20 +231,14 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
     t
   }
 
-  private def sendProgress() {
-    logDebug("Sending progress")
-    // simulated with an allocate request with no nodes requested ...
-    yarnAllocator.allocateResources()
-  }
-
   def finishApplicationMaster(status: FinalApplicationStatus) {
-    logInfo("finish ApplicationMaster with " + status)
-    amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
+    logInfo("Unregistering ApplicationMaster with " + status)
+    val trackingUrl = sparkConf.get("spark.yarn.historyServer.address", "")
+    amClient.unregisterApplicationMaster(status, "" /* appMessage */ , trackingUrl)
   }
 
 }
 
-
 object ExecutorLauncher {
   def main(argStrings: Array[String]) {
     val args = new ApplicationMasterArguments(argStrings)